Skip to content

Commit

Permalink
Only index changed tools when re-indexing the toolbox
Browse files Browse the repository at this point in the history
This speeds up repeated toolbox reloads enourmously (from 1 second on my
test-instance to 0.1 seconds).
  • Loading branch information
mvdbeek committed May 1, 2017
1 parent 43c9c28 commit 34ed9b4
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 41 deletions.
14 changes: 8 additions & 6 deletions lib/galaxy/config.py
Expand Up @@ -913,8 +913,7 @@ def wait_for_toolbox_reload(self, old_toolbox):
# and make sure toolbox has finished reloading)
if self.toolbox.has_reloaded(old_toolbox):
break

time.sleep(1)
time.sleep(0.1)

def _configure_toolbox( self ):
from galaxy import tools
Expand Down Expand Up @@ -948,13 +947,16 @@ def _configure_toolbox( self ):
)
self.container_finder = containers.ContainerFinder(app_info)

def reindex_tool_search( self, toolbox=None ):
def reindex_tool_search( self ):
# Call this when tools are added or removed.
import galaxy.tools.search
index_help = getattr( self.config, "index_tool_help", True )
if not toolbox:
toolbox = self.toolbox
self.toolbox_search = galaxy.tools.search.ToolBoxSearch( toolbox, index_help )
toolbox = self.toolbox
if not hasattr(self, 'toolbox_search'):
self.toolbox_search = galaxy.tools.search.ToolBoxSearch( toolbox, index_help )
else:
self.toolbox_search.update_index(tool_cache=self.tool_cache)
self.tool_cache.reset_status()

def _configure_tool_data_tables( self, from_shed_config ):
from galaxy.tools.data import ToolDataTableManager
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/queue_worker.py
Expand Up @@ -115,7 +115,7 @@ def _get_new_toolbox(app):
load_lib_tools(new_toolbox)
new_toolbox.load_hidden_lib_tool( "galaxy/datatypes/set_metadata_tool.xml" )
[new_toolbox.register_tool(tool) for tool in new_toolbox.data_manager_tools.values()]
app.reindex_tool_search(new_toolbox)
app.reindex_tool_search()
return new_toolbox


Expand Down
77 changes: 46 additions & 31 deletions lib/galaxy/tools/search/__init__.py
Expand Up @@ -57,42 +57,57 @@ def build_index( self, index_help=True ):
writer = self.index.writer()
start_time = datetime.now()
log.debug( 'Starting to build toolbox index.' )
for id, tool in self.toolbox.tools():
# Do not add data managers to the public index
if tool.tool_type == 'manage_data':
continue
add_doc_kwds = {
"id": id,
"description": to_unicode( tool.description ),
"section": to_unicode( tool.get_panel_section()[1] if len( tool.get_panel_section() ) == 2 else '' ),
"help": to_unicode( "" )
}
if tool.name.find( '-' ) != -1:
# Hyphens are wildcards in Whoosh causing bad things
add_doc_kwds['name'] = (' ').join( [ token.text for token in self.rex( to_unicode( tool.name ) ) ] )
else:
add_doc_kwds['name'] = to_unicode( tool.name )
if tool.guid:
# Create a stub consisting of owner, repo, and tool from guid
slash_indexes = [ m.start() for m in re.finditer( '/', tool.guid ) ]
id_stub = tool.guid[ ( slash_indexes[1] + 1 ): slash_indexes[4] ]
add_doc_kwds['stub'] = (' ').join( [ token.text for token in self.rex( to_unicode( id_stub ) ) ] )
else:
add_doc_kwds['stub'] = to_unicode( id )
if tool.labels:
add_doc_kwds['labels'] = to_unicode( " ".join( tool.labels ) )
if index_help and tool.help:
try:
add_doc_kwds['help'] = to_unicode( tool.help.render( host_url="", static_path="" ) )
except Exception:
# Don't fail to build index just because a help message
# won't render.
pass
for tool_id, tool in self.toolbox.tools():
add_doc_kwds = self._create_doc(tool_id=tool_id, tool=tool, index_help=index_help)
writer.add_document( **add_doc_kwds )
writer.commit()
stop_time = datetime.now()
log.debug( 'Toolbox index finished. It took: ' + str(stop_time - start_time) )

def _create_doc(self, tool_id, tool, index_help=True):
# Do not add data managers to the public index
if tool.tool_type == 'manage_data':
return {}
add_doc_kwds = {
"id": tool_id,
"description": to_unicode(tool.description),
"section": to_unicode(tool.get_panel_section()[1] if len(tool.get_panel_section()) == 2 else ''),
"help": to_unicode("")
}
if tool.name.find('-') != -1:
# Hyphens are wildcards in Whoosh causing bad things
add_doc_kwds['name'] = (' ').join([token.text for token in self.rex(to_unicode(tool.name))])
else:
add_doc_kwds['name'] = to_unicode(tool.name)
if tool.guid:
# Create a stub consisting of owner, repo, and tool from guid
slash_indexes = [m.start() for m in re.finditer('/', tool.guid)]
id_stub = tool.guid[(slash_indexes[1] + 1): slash_indexes[4]]
add_doc_kwds['stub'] = (' ').join([token.text for token in self.rex(to_unicode(id_stub))])
else:
add_doc_kwds['stub'] = to_unicode(id)
if tool.labels:
add_doc_kwds['labels'] = to_unicode(" ".join(tool.labels))
if index_help and tool.help:
try:
add_doc_kwds['help'] = to_unicode(tool.help.render(host_url="", static_path=""))
except Exception:
# Don't fail to build index just because a help message
# won't render.
pass
return add_doc_kwds

def update_index(self, tool_cache, index_help=True):
"""Use `tool_cache` to determine which tools need indexing and which tools should be expired."""
writer = self.index.writer()
for tool_id in tool_cache._removed_tool_ids:
writer.delete_by_term('id', tool_id)
for tool_id in tool_cache._new_tool_ids:
tool = tool_cache.get_tool_by_id(tool_id)
add_doc_kwds = self._create_doc(tool_id=tool_id, tool=tool, index_help=index_help)
writer.add_document(**add_doc_kwds)
writer.commit()

def search( self, q, tool_name_boost, tool_section_boost, tool_description_boost, tool_label_boost, tool_stub_boost, tool_help_boost, tool_search_limit, tool_enable_ngram_search, tool_ngram_minsize, tool_ngram_maxsize ):
"""
Perform search on the in-memory index. Weight in the given boosts.
Expand Down
17 changes: 15 additions & 2 deletions lib/galaxy/tools/toolbox/cache.py
Expand Up @@ -15,6 +15,8 @@ def __init__(self):
self._tools_by_path = {}
self._tool_paths_by_id = {}
self._mod_time_by_path = {}
self._new_tool_ids = []
self._removed_tool_ids = []

def cleanup(self):
"""
Expand All @@ -36,6 +38,7 @@ def cleanup(self):
# If by chance the file is being removed while calculating the hash or modtime
# we don't want the thread to die.
pass
self._removed_tool_ids.extend(removed_tool_ids)
return removed_tool_ids

def _should_cleanup(self, config_filename):
Expand All @@ -49,10 +52,13 @@ def _should_cleanup(self, config_filename):
return False

def get_tool(self, config_filename):
""" Get the tool from the cache if the tool is up to date.
"""
"""Get the tool at `config_filename` from the cache if the tool is up to date."""
return self._tools_by_path.get(config_filename, None)

def get_tool_by_id(self, tool_id):
"""Get the tool with the id `tool_id` from the cache if the tool is up to date. """
return self.get_tool(self._tool_paths_by_id.get(tool_id))

def expire_tool(self, tool_id):
if tool_id in self._tool_paths_by_id:
config_filename = self._tool_paths_by_id[tool_id]
Expand All @@ -68,6 +74,13 @@ def cache_tool(self, config_filename, tool):
self._mod_time_by_path[config_filename] = os.path.getmtime(config_filename)
self._tool_paths_by_id[tool_id] = config_filename
self._tools_by_path[config_filename] = tool
self._new_tool_ids.append(tool_id)

def reset_status(self):
"""Reset self._new_tool_ids and self._removed_tool_ids once
all operations that need to know about new tools have finished running."""
self._new_tool_ids = []
self._removed_tool_ids = []


class ToolShedRepositoryCache(object):
Expand Down
2 changes: 1 addition & 1 deletion lib/tool_shed/galaxy_install/install_manager.py
Expand Up @@ -609,7 +609,7 @@ def __handle_repository_contents( self, tool_shed_repository, tool_path, reposit
display_path=display_path )
if converter_path:
# Load proprietary datatype converters
self.app.datatypes_registry.load_datatype_converters( self.app.toolbox, installed_repository_dict=repository_dict )
self.app.datatypes_registry.load_datatype_converters( self.app.toolbox, installed_repository_dict=repository_dict, use_cached=True)
if display_path:
# Load proprietary datatype display applications
self.app.datatypes_registry.load_display_applications( self.app, installed_repository_dict=repository_dict )
Expand Down

0 comments on commit 34ed9b4

Please sign in to comment.