Skip to content

Commit

Permalink
DatasetMatcherClean - Implement DatasetMatcherFactory to reason for w…
Browse files Browse the repository at this point in the history
…hole tool.

In subsequent commit I'll use this central store of all the inputs for a tool to determine if summary data about collections can be used instead of processing individual datasets one at a time.

Even this commit though uses the abstraction to optimize datatype checking and cache commons checks when possible - should lead to a lot fewer objects being created when processing a large history.
  • Loading branch information
jmchilton committed Apr 27, 2018
1 parent ca80232 commit efe5d8b
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 14 deletions.
6 changes: 6 additions & 0 deletions lib/galaxy/tools/__init__.py
Expand Up @@ -56,6 +56,10 @@
ToolParameter,
workflow_building_modes,
)
from galaxy.tools.parameters.dataset_matcher import (
set_dataset_matcher_factory,
unset_dataset_matcher_factory,
)
from galaxy.tools.parameters.grouping import Conditional, ConditionalWhen, Repeat, Section, UploadDataset
from galaxy.tools.parameters.input_translation import ToolInputTranslator
from galaxy.tools.parameters.meta import expand_meta_parameters
Expand Down Expand Up @@ -1826,7 +1830,9 @@ def to_json(self, trans, kwd={}, job=None, workflow_building_mode=False):
# create tool model
tool_model = self.to_dict(request_context)
tool_model['inputs'] = []
set_dataset_matcher_factory(request_context, self, state_inputs)
self.populate_model(request_context, self.inputs, state_inputs, tool_model['inputs'])
unset_dataset_matcher_factory(request_context)

# create tool help
tool_help = ''
Expand Down
28 changes: 14 additions & 14 deletions lib/galaxy/tools/parameters/basic.py
Expand Up @@ -26,8 +26,7 @@
from galaxy.web import url_for
from . import validation
from .dataset_matcher import (
DatasetCollectionMatcher,
DatasetMatcher
get_dataset_matcher_factory,
)
from .sanitize import ToolParameterSanitizer
from ..parameters import (
Expand Down Expand Up @@ -1484,14 +1483,15 @@ def get_initial_value(self, trans, other_values):
return None
history = trans.history
if history is not None:
dataset_matcher = DatasetMatcher(trans, self, other_values)
dataset_matcher_factory = get_dataset_matcher_factory(trans)
dataset_matcher = dataset_matcher_factory.dataset_matcher(self, other_values)
if isinstance(self, DataToolParameter):
for hda in reversed(history.active_datasets_and_roles):
match = dataset_matcher.hda_match(hda)
if match:
return match.hda
else:
dataset_collection_matcher = DatasetCollectionMatcher(dataset_matcher)
dataset_collection_matcher = dataset_matcher_factory.dataset_collection_matcher(dataset_matcher)
for hdca in reversed(history.active_dataset_collections):
if dataset_collection_matcher.hdca_match(hdca, reduction=self.multiple):
return hdca
Expand Down Expand Up @@ -1790,7 +1790,8 @@ def to_dict(self, trans, other_values={}):
return d

# prepare dataset/collection matching
dataset_matcher = DatasetMatcher(trans, self, other_values)
dataset_matcher_factory = get_dataset_matcher_factory(trans)
dataset_matcher = dataset_matcher_factory.dataset_matcher(self, other_values)
multiple = self.multiple

# build and append a new select option
Expand Down Expand Up @@ -1822,7 +1823,7 @@ def append(list, hda, name, src, keep=False):
append(d['options']['hda'], hda, '(%s) %s' % (hda_state, hda.name), 'hda', True)

# add dataset collections
dataset_collection_matcher = DatasetCollectionMatcher(dataset_matcher)
dataset_collection_matcher = dataset_matcher_factory.dataset_collection_matcher(dataset_matcher)
for hdca in history.active_dataset_collections:
if dataset_collection_matcher.hdca_match(hdca, reduction=multiple):
append(d['options']['hdca'], hdca, hdca.name, 'hdca')
Expand Down Expand Up @@ -1859,18 +1860,15 @@ def _history_query(self, trans):
dataset_collection_type_descriptions = trans.app.dataset_collections_service.collection_type_descriptions
return history_query.HistoryQuery.from_parameter(self, dataset_collection_type_descriptions)

def match_collections(self, trans, history, dataset_matcher):
def match_collections(self, trans, history, dataset_collection_matcher):
dataset_collections = trans.app.dataset_collections_service.history_dataset_collections(history, self._history_query(trans))
dataset_collection_matcher = DatasetCollectionMatcher(dataset_matcher)

for dataset_collection_instance in dataset_collections:
if not dataset_collection_matcher.hdca_match(dataset_collection_instance):
continue
yield dataset_collection_instance

def match_multirun_collections(self, trans, history, dataset_matcher):
dataset_collection_matcher = DatasetCollectionMatcher(dataset_matcher)

def match_multirun_collections(self, trans, history, dataset_collection_matcher):
for history_dataset_collection in history.active_dataset_collections:
if not self._history_query(trans).can_map_over(history_dataset_collection):
continue
Expand Down Expand Up @@ -1947,10 +1945,12 @@ def to_dict(self, trans, other_values=None):
return d

# prepare dataset/collection matching
dataset_matcher = DatasetMatcher(trans, self, other_values)
dataset_matcher_factory = get_dataset_matcher_factory(trans)
dataset_matcher = dataset_matcher_factory.dataset_matcher(self, other_values)
dataset_collection_matcher = dataset_matcher_factory.dataset_collection_matcher(dataset_matcher)

# append directly matched collections
for hdca in self.match_collections(trans, history, dataset_matcher):
for hdca in self.match_collections(trans, history, dataset_collection_matcher):
d['options']['hdca'].append({
'id' : trans.security.encode_id(hdca.id),
'hid' : hdca.hid,
Expand All @@ -1960,7 +1960,7 @@ def to_dict(self, trans, other_values=None):
})

# append matching subcollections
for hdca in self.match_multirun_collections(trans, history, dataset_matcher):
for hdca in self.match_multirun_collections(trans, history, dataset_collection_matcher):
subcollection_type = self._history_query(trans).can_map_over(hdca).collection_type
d['options']['hdca'].append({
'id' : trans.security.encode_id(hdca.id),
Expand Down
40 changes: 40 additions & 0 deletions lib/galaxy/tools/parameters/dataset_matcher.py
Expand Up @@ -5,6 +5,46 @@
log = getLogger(__name__)


def set_dataset_matcher_factory(trans, tool, param_values):
trans.dataset_matcher_factory = DatasetMatcherFactory(trans, tool, param_values)


def unset_dataset_matcher_factory(trans):
trans.dataset_matcher_factory = None


def get_dataset_matcher_factory(trans):
dataset_matcher_factory = getattr(trans, "dataset_matcher_factory", None)
return dataset_matcher_factory or DatasetMatcherFactory(trans)


class DatasetMatcherFactory(object):
""""""

def __init__(self, trans, tool=None, param_values=None):
self._trans = trans
self._tool = tool
self._data_inputs = []
if tool is not None and param_values is not None:
self._collect_data_inputs(tool, param_values)

def _collect_data_inputs(self, tool, param_values):
def visitor(input, value, prefix, parent=None, **kwargs):
type_name = type(input).__name__
if "DataToolParameter" in type_name:
self._data_inputs.append(input)
elif "DatasetCollectionToolParameter" in type_name:
self._data_inputs.append(input)

tool.visit_inputs(param_values, visitor)

def dataset_matcher(self, param, other_values):
return DatasetMatcher(self._trans, param, other_values)

def dataset_collection_matcher(self, dataset_matcher):
return DatasetCollectionMatcher(dataset_matcher)


class DatasetMatcher(object):
""" Utility class to aid DataToolParameter and similar classes in reasoning
about what HDAs could match or are selected for a parameter and value.
Expand Down

0 comments on commit efe5d8b

Please sign in to comment.