Skip to content

Commit

Permalink
[WIP] Implement Expression tools in Galaxy.
Browse files Browse the repository at this point in the history
- Tool definition languge and plumbing and datatype for expressing expressions as jobs.
- Allow connecting expression tools to parameters in workflows, will delay evaluation of workflow so calculated value
- Example test expression tools for testing and demonstration.
- [WIP] Workflow expression module to allow users to specify arbitrary expressions.
  • Loading branch information
jmchilton committed Jan 18, 2018
1 parent 8cc6165 commit a44455a
Show file tree
Hide file tree
Showing 22 changed files with 551 additions and 6 deletions.
1 change: 1 addition & 0 deletions config/datatypes_conf.xml.sample
Expand Up @@ -385,6 +385,7 @@
<!-- End RGenetics Datatypes -->
<datatype extension="ipynb" type="galaxy.datatypes.text:Ipynb" display_in_upload="true" />
<datatype extension="json" type="galaxy.datatypes.text:Json" display_in_upload="true" />
<datatype extension="expression.json" type="galaxy.datatypes.text:ExpressionJson" display_in_upload="true" />
<!-- graph datatypes -->
<datatype extension="xgmml" type="galaxy.datatypes.graph:Xgmml" display_in_upload="true"/>
<datatype extension="sif" type="galaxy.datatypes.graph:Sif" display_in_upload="true"/>
Expand Down
24 changes: 24 additions & 0 deletions lib/galaxy/datatypes/text.py
Expand Up @@ -105,6 +105,30 @@ def display_peek(self, dataset):
return "JSON file (%s)" % (nice_size(dataset.get_size()))


class ExpressionJson(Json):
""" Represents the non-data input or output to a tool or workflow.
"""
file_ext = "json"
MetadataElement(name="json_type", default=None, desc="JavaScript or JSON type of expression", readonly=True, visible=True, no_value=None)

def set_meta(self, dataset, **kwd):
"""
"""
json_type = "null"
with open(dataset.file_name) as f:
obj = json.load(f)
if isinstance(obj, int):
json_type = "int"
elif isinstance(obj, float):
json_type = "float"
elif isinstance(obj, list):
json_type = "list"
elif isinstance(obj, dict):
json_type = "object"

dataset.metadata.json_type = json_type


class Ipynb(Json):
file_ext = "ipynb"

Expand Down
69 changes: 68 additions & 1 deletion lib/galaxy/tools/__init__.py
Expand Up @@ -60,6 +60,7 @@
from galaxy.tools.parameters.grouping import Conditional, ConditionalWhen, Repeat, Section, UploadDataset
from galaxy.tools.parameters.input_translation import ToolInputTranslator
from galaxy.tools.parameters.meta import expand_meta_parameters
from galaxy.tools.parameters.wrapped_json import json_wrap
from galaxy.tools.parser import (
get_tool_source,
get_tool_source_from_representation,
Expand Down Expand Up @@ -2104,6 +2105,71 @@ def exec_before_job(self, app, inp_data, out_data, param_dict=None):
out.close()


class ExpressionTool(Tool):
tool_type = 'expression'
EXPRESSION_INPUTS_NAME = "_expression_inputs_.json"

def parse_command(self, tool_source):
self.command = expressions.EXPRESSION_SCRIPT_CALL
self.interpreter = None
self._expression = tool_source.parse_expression().strip()

def parse_outputs(self, tool_source):
# Setup self.outputs and self.output_collections
super(ExpressionTool, self).parse_outputs(tool_source)

# Validate these outputs for expression tools.
if len(self.output_collections) != 0:
message = "Expression tools may not declare output collections at this time."
raise Exception(message)
for output in self.outputs.values():
if not hasattr(output, "from_expression"):
message = "Expression tools may not declare output datasets at this time."
raise Exception(message)

def exec_before_job(self, app, inp_data, out_data, param_dict=None):
super(ExpressionTool, self).exec_before_job(app, inp_data, out_data, param_dict=param_dict)
local_working_directory = param_dict["__local_working_directory__"]
expression_inputs_path = os.path.join(local_working_directory, 'working', ExpressionTool.EXPRESSION_INPUTS_NAME)

outputs = []
for i, (out_name, data) in enumerate(out_data.iteritems()):
output_def = self.outputs[out_name]
wrapped_data = param_dict.get(out_name)
file_name = str(wrapped_data)

outputs.append(dict(
name=out_name,
from_expression=output_def.from_expression,
path=file_name,
))

if param_dict is None:
raise Exception("Internal error - param_dict is empty.")

job = {}
json_wrap(self.inputs, param_dict, job, handle_files='OBJECT')
expression_inputs = {
'job': job,
'script': self._expression,
'outputs': outputs,
}
expressions.write_evalute_script(os.path.join(local_working_directory, 'working'))
with open(expression_inputs_path, "w") as f:
json.dump(expression_inputs, f)

def parse_environment_variables(self, tool_source):
""" Setup environment variable for inputs file.
"""
environmnt_variables_raw = super(ExpressionTool, self).parse_environment_variables(tool_source)
expression_script_inputs = dict(
name="GALAXY_EXPRESSION_INPUTS",
template=ExpressionTool.EXPRESSION_INPUTS_NAME,
)
environmnt_variables_raw.append(expression_script_inputs)
return environmnt_variables_raw


class DataSourceTool(OutputParameterJSONTool):
"""
Alternate implementation of Tool for data_source tools -- those that
Expand Down Expand Up @@ -2352,6 +2418,7 @@ class UsesExpressions:
requires_js_runtime = True

def _expression_environment(self, hda):
# TODO: use json_wrap HDA stuff for this for this...
raw_as_dict = hda.to_dict()
filtered_as_dict = {}
# We are more conservative with the API provided to tools
Expand Down Expand Up @@ -2737,7 +2804,7 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history

# Populate tool_type to ToolClass mappings
tool_types = {}
for tool_class in [Tool, SetMetadataTool, OutputParameterJSONTool,
for tool_class in [Tool, SetMetadataTool, OutputParameterJSONTool, ExpressionTool,
DataManagerTool, DataSourceTool, AsyncDataSourceTool,
UnzipCollectionTool, ZipCollectionTool, MergeCollectionTool, RelabelFromFileTool, FilterFromFileTool,
DataDestinationTool]:
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy/tools/expressions/__init__.py
@@ -1,12 +1,20 @@
from .evaluation import evaluate
from .sandbox import execjs, interpolate
from .util import jshead, find_engine
from .script import (
write_evalute_script,
EXPRESSION_SCRIPT_CALL,
EXPRESSION_SCRIPT_NAME,
)


__all__ = (
'evaluate',
'execjs',
'EXPRESSION_SCRIPT_CALL',
'EXPRESSION_SCRIPT_NAME',
'find_engine',
'interpolate',
'jshead',
'write_evalute_script',
)
15 changes: 15 additions & 0 deletions lib/galaxy/tools/expressions/script.py
@@ -0,0 +1,15 @@
import os

EXPRESSION_SCRIPT_NAME = "_evaluate_expression_.py"
EXPRESSION_SCRIPT_CALL = "python %s" % EXPRESSION_SCRIPT_NAME


def write_evalute_script(in_directory):
""" Responsible for writing the script that evaluates expressions
in Galaxy jobs.
"""
script = os.path.join(in_directory, EXPRESSION_SCRIPT_NAME)
with open(script, "w") as f:
f.write('from galaxy_ext.expressions.handle_job import run; run()')

return script
23 changes: 23 additions & 0 deletions lib/galaxy/tools/parameters/wrapped_json.py
Expand Up @@ -46,10 +46,18 @@ def _json_wrap_input(input, value, handle_files="SKIP"):
elif input_type == "data" and input.multiple:
if handle_files == "SKIP":
return SKIP_INPUT
# TODO: map over _hda_to_object
raise NotImplementedError()
elif input_type == "data":
if handle_files == "SKIP":
return SKIP_INPUT
elif handle_files == "OBJECT":
if value:
if isinstance(value, list):
value = value[0]
return _hda_to_object(value)
else:
return None
raise NotImplementedError()
elif input_type == "data_collection":
if handle_files == "SKIP":
Expand All @@ -72,6 +80,21 @@ def _json_wrap_input(input, value, handle_files="SKIP"):
return json_value


def _hda_to_object(hda):
hda_dict = hda.to_dict()
metadata_dict = {}

for key, value in hda_dict.items():
if key.startswith("metadata_"):
metadata_dict[key[len("metadata_"):]] = value

return {
'file_ext': hda_dict['file_ext'],
'name': hda_dict['name'],
'metadata': metadata_dict,
}


def _cast_if_not_none(value, cast_to, empty_to_none=False):
# log.debug("value [%s], type[%s]" % (value, type(value)))
if value is None or (empty_to_none and str(value) == ''):
Expand Down
5 changes: 5 additions & 0 deletions lib/galaxy/tools/parser/interface.py
Expand Up @@ -86,6 +86,11 @@ def parse_command(self):
""" Return string contianing command to run.
"""

def parse_expression(self):
""" Return string contianing command to run.
"""
return None

@abstractmethod
def parse_environment_variables(self):
""" Return environment variable templates to expose.
Expand Down
26 changes: 25 additions & 1 deletion lib/galaxy/tools/parser/output_objects.py
Expand Up @@ -21,12 +21,13 @@ class ToolOutput(ToolOutputBase):
(format, metadata_source, parent)
"""

dict_collection_visible_keys = ['name', 'format', 'label', 'hidden']
dict_collection_visible_keys = ['name', 'format', 'label', 'hidden', 'output_type']

def __init__(self, name, format=None, format_source=None, metadata_source=None,
parent=None, label=None, filters=None, actions=None, hidden=False,
implicit=False):
super(ToolOutput, self).__init__(name, label=label, filters=filters, hidden=hidden)
self.output_type = "data"
self.format = format
self.format_source = format_source
self.metadata_source = metadata_source
Expand Down Expand Up @@ -67,6 +68,27 @@ def to_dict(self, view='collection', value_mapper=None, app=None):
return as_dict


class ToolExpressionOutput(ToolOutputBase):
dict_collection_visible_keys = ('name', 'format', 'label', 'hidden', 'output_type')

def __init__(self, name, output_type, from_expression,
label=None, filters=None, actions=None, hidden=False):
super(ToolExpressionOutput, self).__init__(name, label=label, filters=filters, hidden=hidden)
self.output_type = output_type # JSON type...
self.from_expression = from_expression
self.format = "expression.json" # galaxy.datatypes.text.ExpressionJson.file_ext

self.format_source = None
self.metadata_source = None
self.parent = None
self.actions = actions

# Initialize default values
self.change_format = []
self.implicit = False
self.from_work_dir = None


class ToolOutputCollection(ToolOutputBase):
"""
Represents a HistoryDatasetCollectionAssociation of output datasets produced
Expand All @@ -82,6 +104,7 @@ class ToolOutputCollection(ToolOutputBase):
</collection>
<outputs>
"""
dict_collection_visible_keys = ('name', 'format', 'label', 'hidden', 'output_type')

def __init__(
self,
Expand All @@ -97,6 +120,7 @@ def __init__(
inherit_metadata=False
):
super(ToolOutputCollection, self).__init__(name, label=label, filters=filters, hidden=hidden)
self.output_type = "collection"
self.collection = True
self.default_format = default_format
self.structure = structure
Expand Down
47 changes: 45 additions & 2 deletions lib/galaxy/tools/parser/xml.py
Expand Up @@ -21,6 +21,7 @@
from .output_actions import ToolOutputActionGroup
from .output_collection_def import dataset_collector_descriptions_from_elem
from .output_objects import (
ToolExpressionOutput,
ToolOutput,
ToolOutputCollection,
ToolOutputCollectionStructure
Expand Down Expand Up @@ -110,6 +111,12 @@ def parse_command(self):
command_el = self._command_el
return ((command_el is not None) and command_el.text) or None

def parse_expression(self):
""" Return string contianing command to run.
"""
expression_el = self.root.find("expression")
return ((expression_el is not None) and expression_el.text) or None

def parse_environment_variables(self):
environment_variables_el = self.root.find("environment_variables")
if environment_variables_el is None:
Expand Down Expand Up @@ -231,9 +238,12 @@ def _parse(data_elem, **kwds):
data_dict[output_def.name] = output_def
return output_def

map(_parse, out_elem.findall("data"))
def _parse_expression(output_elem, **kwds):
output_def = self._parse_expression_output(output_elem, tool, **kwds)
data_dict[output_def.name] = output_def
return output_def

for collection_elem in out_elem.findall("collection"):
def _parse_collection(collection_elem):
name = collection_elem.get("name")
label = xml_text(collection_elem, "label")
default_format = collection_elem.get("format", "data")
Expand Down Expand Up @@ -287,6 +297,22 @@ def _parse(data_elem, **kwds):
output_collection.outputs[output_name] = data
output_collections[name] = output_collection

for out_child in out_elem.getchildren():
if out_child.tag == "data":
_parse(out_child)
elif out_child.tag == "collection":
_parse_collection(out_child)
elif out_child.tag == "output":
output_type = out_child.get("type")
if output_type == "data":
_parse(out_child)
elif output_type == "collection":
_parse_collection(out_child)
else:
_parse_expression(out_child)
else:
log.warn("Unknown output tag encountered [%s]" % out_child.tag)

for output_def in data_dict.values():
outputs[output_def.name] = output_def
return outputs, output_collections
Expand All @@ -298,6 +324,7 @@ def _parse_output(
default_format="data",
default_format_source=None,
default_metadata_source="",
expression_type=None,
):
output = ToolOutput(data_elem.get("name"))
output_format = data_elem.get("format", default_format)
Expand All @@ -322,6 +349,22 @@ def _parse_output(
output.dataset_collector_descriptions = dataset_collector_descriptions_from_elem(data_elem, legacy=self.legacy_defaults)
return output

def _parse_expression_output(self, output_elem, tool, **kwds):
output_type = output_elem.get("type")
from_expression = output_elem.get("from")
output = ToolExpressionOutput(
output_elem.get("name"),
output_type,
from_expression,
)
output.path = output_elem.get("value")
output.label = xml_text(output_elem, "label")

output.hidden = string_as_bool(output_elem.get("hidden", ""))
output.actions = ToolOutputActionGroup(output, output_elem.find('actions'))
output.dataset_collector_descriptions = []
return output

def parse_stdio(self):
command_el = self._command_el
detect_errors = None
Expand Down
3 changes: 3 additions & 0 deletions lib/galaxy/tools/parser/yaml.py
Expand Up @@ -54,6 +54,9 @@ def parse_require_login(self, default):
def parse_command(self):
return self.root_dict.get("command")

def parse_expression(self):
return self.root_dict.get("expression")

def parse_environment_variables(self):
return []

Expand Down

0 comments on commit a44455a

Please sign in to comment.