-
Notifications
You must be signed in to change notification settings - Fork 967
/
common_util.py
382 lines (337 loc) · 20 KB
/
common_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
import json
import logging
import os
import urllib
import urllib2
from galaxy import util
from galaxy.util.odict import odict
from galaxy.web import url_for
from tool_shed.util import encoding_util, xml_util
log = logging.getLogger( __name__ )
REPOSITORY_OWNER = 'devteam'
def accumulate_tool_dependencies( tool_shed_accessible, tool_dependencies, all_tool_dependencies ):
if tool_shed_accessible:
if tool_dependencies:
for tool_dependency in tool_dependencies:
if tool_dependency not in all_tool_dependencies:
all_tool_dependencies.append( tool_dependency )
return all_tool_dependencies
def check_for_missing_tools( app, tool_panel_configs, latest_tool_migration_script_number ):
# Get the 000x_tools.xml file associated with the current migrate_tools version number.
tools_xml_file_path = os.path.abspath( os.path.join( 'scripts', 'migrate_tools', '%04d_tools.xml' % latest_tool_migration_script_number ) )
# Parse the XML and load the file attributes for later checking against the proprietary tool_panel_config.
migrated_tool_configs_dict = odict()
tree, error_message = xml_util.parse_xml( tools_xml_file_path )
if tree is None:
return False, odict()
root = tree.getroot()
tool_shed = root.get( 'name' )
tool_shed_url = get_tool_shed_url_from_tool_shed_registry( app, tool_shed )
# The default behavior is that the tool shed is down.
tool_shed_accessible = False
missing_tool_configs_dict = odict()
if tool_shed_url:
for elem in root:
if elem.tag == 'repository':
repository_dependencies = []
all_tool_dependencies = []
repository_name = elem.get( 'name' )
changeset_revision = elem.get( 'changeset_revision' )
tool_shed_accessible, repository_dependencies_dict = get_repository_dependencies( app,
tool_shed_url,
repository_name,
REPOSITORY_OWNER,
changeset_revision )
if tool_shed_accessible:
# Accumulate all tool dependencies defined for repository dependencies for display to the user.
for rd_key, rd_tups in repository_dependencies_dict.items():
if rd_key in [ 'root_key', 'description' ]:
continue
for rd_tup in rd_tups:
tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
parse_repository_dependency_tuple( rd_tup )
tool_shed_accessible, tool_dependencies = get_tool_dependencies( app,
tool_shed_url,
name,
owner,
changeset_revision )
all_tool_dependencies = accumulate_tool_dependencies( tool_shed_accessible, tool_dependencies, all_tool_dependencies )
tool_shed_accessible, tool_dependencies = get_tool_dependencies( app,
tool_shed_url,
repository_name,
REPOSITORY_OWNER,
changeset_revision )
all_tool_dependencies = accumulate_tool_dependencies( tool_shed_accessible, tool_dependencies, all_tool_dependencies )
for tool_elem in elem.findall( 'tool' ):
tool_config_file_name = tool_elem.get( 'file' )
if tool_config_file_name:
# We currently do nothing with repository dependencies except install them (we do not display repositories that will be
# installed to the user). However, we'll store them in the following dictionary in case we choose to display them in the
# future.
dependencies_dict = dict( tool_dependencies=all_tool_dependencies,
repository_dependencies=repository_dependencies )
migrated_tool_configs_dict[ tool_config_file_name ] = dependencies_dict
else:
break
if tool_shed_accessible:
# Parse the proprietary tool_panel_configs (the default is tool_conf.xml) and generate the list of missing tool config file names.
for tool_panel_config in tool_panel_configs:
tree, error_message = xml_util.parse_xml( tool_panel_config )
if tree:
root = tree.getroot()
for elem in root:
if elem.tag == 'tool':
missing_tool_configs_dict = check_tool_tag_set( elem, migrated_tool_configs_dict, missing_tool_configs_dict )
elif elem.tag == 'section':
for section_elem in elem:
if section_elem.tag == 'tool':
missing_tool_configs_dict = check_tool_tag_set( section_elem, migrated_tool_configs_dict, missing_tool_configs_dict )
else:
exception_msg = '\n\nThe entry for the main Galaxy tool shed at %s is missing from the %s file. ' % ( tool_shed, app.config.tool_sheds_config )
exception_msg += 'The entry for this tool shed must always be available in this file, so re-add it before attempting to start your Galaxy server.\n'
raise Exception( exception_msg )
return tool_shed_accessible, missing_tool_configs_dict
def check_tool_tag_set( elem, migrated_tool_configs_dict, missing_tool_configs_dict ):
file_path = elem.get( 'file', None )
if file_path:
path, name = os.path.split( file_path )
for migrated_tool_config in migrated_tool_configs_dict.keys():
if migrated_tool_config in [ file_path, name ]:
missing_tool_configs_dict[ name ] = migrated_tool_configs_dict[ migrated_tool_config ]
return missing_tool_configs_dict
def generate_clone_url_for_installed_repository( app, repository ):
"""Generate the URL for cloning a repository that has been installed into a Galaxy instance."""
tool_shed_url = get_tool_shed_url_from_tool_shed_registry( app, str( repository.tool_shed ) )
return url_join( tool_shed_url, pathspec=[ 'repos', str( repository.owner ), str( repository.name ) ] )
def generate_clone_url_for_repository_in_tool_shed( user, repository ):
"""Generate the URL for cloning a repository that is in the tool shed."""
base_url = url_for( '/', qualified=True ).rstrip( '/' )
if user:
protocol, base = base_url.split( '://' )
username = '%s@' % user.username
return '%s://%s%s/repos/%s/%s' % ( protocol, username, base, repository.user.username, repository.name )
else:
return '%s/repos/%s/%s' % ( base_url, repository.user.username, repository.name )
def generate_clone_url_from_repo_info_tup( app, repo_info_tup ):
"""Generate the URL for cloning a repository given a tuple of toolshed, name, owner, changeset_revision."""
# Example tuple: ['http://localhost:9009', 'blast_datatypes', 'test', '461a4216e8ab', False]
toolshed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = \
parse_repository_dependency_tuple( repo_info_tup )
tool_shed_url = get_tool_shed_url_from_tool_shed_registry( app, toolshed )
# Don't include the changeset_revision in clone urls.
return url_join( tool_shed_url, pathspec=[ 'repos', owner, name ] )
def get_non_shed_tool_panel_configs( app ):
"""Get the non-shed related tool panel configs - there can be more than one, and the default is tool_conf.xml."""
config_filenames = []
for config_filename in app.config.tool_configs:
# Any config file that includes a tool_path attribute in the root tag set like the following is shed-related.
# <toolbox tool_path="../shed_tools">
tree, error_message = xml_util.parse_xml( config_filename )
if tree is None:
continue
root = tree.getroot()
tool_path = root.get( 'tool_path', None )
if tool_path is None:
config_filenames.append( config_filename )
return config_filenames
def get_repository_dependencies( app, tool_shed_url, repository_name, repository_owner, changeset_revision ):
repository_dependencies_dict = {}
tool_shed_accessible = True
params = dict( name=repository_name, owner=repository_owner, changeset_revision=changeset_revision )
pathspec = [ 'repository', 'get_repository_dependencies' ]
try:
raw_text = tool_shed_get( app, tool_shed_url, pathspec=pathspec, params=params )
tool_shed_accessible = True
except Exception, e:
tool_shed_accessible = False
log.warn( "The URL\n%s\nraised the exception:\n%s\n" % ( url_join( tool_shed_url, pathspec=pathspec, params=params ), str( e ) ) )
if tool_shed_accessible:
if len( raw_text ) > 2:
encoded_text = json.loads( raw_text )
repository_dependencies_dict = encoding_util.tool_shed_decode( encoded_text )
return tool_shed_accessible, repository_dependencies_dict
def get_protocol_from_tool_shed_url( tool_shed_url ):
"""Return the protocol from the received tool_shed_url if it exists."""
try:
if tool_shed_url.find( '://' ) > 0:
return tool_shed_url.split( '://' )[0].lower()
except Exception, e:
# We receive a lot of calls here where the tool_shed_url is None. The container_util uses
# that value when creating a header row. If the tool_shed_url is not None, we have a problem.
if tool_shed_url is not None:
log.exception( "Handled exception getting the protocol from Tool Shed URL %s:\n%s" % ( str( tool_shed_url ), str( e ) ) )
# Default to HTTP protocol.
return 'http'
def get_tool_dependencies( app, tool_shed_url, repository_name, repository_owner, changeset_revision ):
tool_dependencies = []
tool_shed_accessible = True
params = dict( name=repository_name, owner=repository_owner, changeset_revision=changeset_revision )
pathspec = [ 'repository', 'get_tool_dependencies' ]
try:
text = tool_shed_get( app, tool_shed_url, pathspec=pathspec, params=params )
tool_shed_accessible = True
except Exception, e:
tool_shed_accessible = False
log.warn( "The URL\n%s\nraised the exception:\n%s\n" % ( url_join( tool_shed_url, pathspec=pathspec, params=params ), str( e ) ) )
if tool_shed_accessible:
if text:
tool_dependencies_dict = encoding_util.tool_shed_decode( text )
for dependency_key, requirements_dict in tool_dependencies_dict.items():
tool_dependency_name = requirements_dict[ 'name' ]
tool_dependency_version = requirements_dict[ 'version' ]
tool_dependency_type = requirements_dict[ 'type' ]
tool_dependencies.append( ( tool_dependency_name, tool_dependency_version, tool_dependency_type ) )
return tool_shed_accessible, tool_dependencies
def get_tool_shed_repository_ids( as_string=False, **kwd ):
tsrid = kwd.get( 'tool_shed_repository_id', None )
tsridslist = util.listify( kwd.get( 'tool_shed_repository_ids', None ) )
if not tsridslist:
tsridslist = util.listify( kwd.get( 'id', None ) )
if tsridslist is not None:
if tsrid is not None and tsrid not in tsridslist:
tsridslist.append( tsrid )
if as_string:
return ','.join( tsridslist )
return tsridslist
else:
tsridslist = util.listify( kwd.get( 'ordered_tsr_ids', None ) )
if tsridslist is not None:
if as_string:
return ','.join( tsridslist )
return tsridslist
if as_string:
''
return []
def get_tool_shed_url_from_tool_shed_registry( app, tool_shed ):
"""
The value of tool_shed is something like: toolshed.g2.bx.psu.edu. We need the URL to this tool shed, which is
something like: http://toolshed.g2.bx.psu.edu/
"""
cleaned_tool_shed = remove_protocol_from_tool_shed_url( tool_shed )
for shed_name, shed_url in app.tool_shed_registry.tool_sheds.items():
if shed_url.find( cleaned_tool_shed ) >= 0:
if shed_url.endswith( '/' ):
shed_url = shed_url.rstrip( '/' )
return shed_url
# The tool shed from which the repository was originally installed must no longer be configured in tool_sheds_conf.xml.
return None
def handle_galaxy_url( trans, **kwd ):
galaxy_url = kwd.get( 'galaxy_url', None )
if galaxy_url:
trans.set_cookie( galaxy_url, name='toolshedgalaxyurl' )
else:
galaxy_url = trans.get_cookie( name='toolshedgalaxyurl' )
return galaxy_url
def handle_tool_shed_url_protocol( app, shed_url ):
"""Handle secure and insecure HTTP protocol since they may change over time."""
try:
if app.name == 'galaxy':
url = remove_protocol_from_tool_shed_url( shed_url )
tool_shed_url = get_tool_shed_url_from_tool_shed_registry( app, url )
else:
tool_shed_url = str( url_for( '/', qualified=True ) ).rstrip( '/' )
return tool_shed_url
except Exception, e:
# We receive a lot of calls here where the tool_shed_url is None. The container_util uses
# that value when creating a header row. If the tool_shed_url is not None, we have a problem.
if shed_url is not None:
log.exception( "Handled exception removing protocol from URL %s:\n%s" % ( str( shed_url ), str( e ) ) )
return shed_url
def parse_repository_dependency_tuple( repository_dependency_tuple, contains_error=False ):
# Default both prior_installation_required and only_if_compiling_contained_td to False in cases where metadata should be reset on the
# repository containing the repository_dependency definition.
prior_installation_required = 'False'
only_if_compiling_contained_td = 'False'
if contains_error:
if len( repository_dependency_tuple ) == 5:
tool_shed, name, owner, changeset_revision, error = repository_dependency_tuple
elif len( repository_dependency_tuple ) == 6:
tool_shed, name, owner, changeset_revision, prior_installation_required, error = repository_dependency_tuple
elif len( repository_dependency_tuple ) == 7:
tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td, error = \
repository_dependency_tuple
return tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td, error
else:
if len( repository_dependency_tuple ) == 4:
tool_shed, name, owner, changeset_revision = repository_dependency_tuple
elif len( repository_dependency_tuple ) == 5:
tool_shed, name, owner, changeset_revision, prior_installation_required = repository_dependency_tuple
elif len( repository_dependency_tuple ) == 6:
tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td = repository_dependency_tuple
return tool_shed, name, owner, changeset_revision, prior_installation_required, only_if_compiling_contained_td
def remove_port_from_tool_shed_url( tool_shed_url ):
"""Return a partial Tool Shed URL, eliminating the port if it exists."""
try:
if tool_shed_url.find( ':' ) > 0:
# Eliminate the port, if any, since it will result in an invalid directory name.
new_tool_shed_url = tool_shed_url.split( ':' )[ 0 ]
else:
new_tool_shed_url = tool_shed_url
return new_tool_shed_url.rstrip( '/' )
except Exception, e:
# We receive a lot of calls here where the tool_shed_url is None. The container_util uses
# that value when creating a header row. If the tool_shed_url is not None, we have a problem.
if tool_shed_url is not None:
log.exception( "Handled exception removing the port from Tool Shed URL %s:\n%s" % ( str( tool_shed_url ), str( e ) ) )
return tool_shed_url
def remove_protocol_and_port_from_tool_shed_url( tool_shed_url ):
"""Return a partial Tool Shed URL, eliminating the protocol and/or port if either exists."""
tool_shed = remove_protocol_from_tool_shed_url( tool_shed_url )
tool_shed = remove_port_from_tool_shed_url( tool_shed )
return tool_shed
def remove_protocol_and_user_from_clone_url( repository_clone_url ):
"""Return a URL that can be used to clone a repository, eliminating the protocol and user if either exists."""
if repository_clone_url.find( '@' ) > 0:
# We have an url that includes an authenticated user, something like:
# http://test@bx.psu.edu:9009/repos/some_username/column
items = repository_clone_url.split( '@' )
tmp_url = items[ 1 ]
elif repository_clone_url.find( '//' ) > 0:
# We have an url that includes only a protocol, something like:
# http://bx.psu.edu:9009/repos/some_username/column
items = repository_clone_url.split( '//' )
tmp_url = items[ 1 ]
else:
tmp_url = repository_clone_url
return tmp_url.rstrip( '/' )
def remove_protocol_from_tool_shed_url( tool_shed_url ):
"""Return a partial Tool Shed URL, eliminating the protocol if it exists."""
try:
if tool_shed_url.find( '://' ) > 0:
new_tool_shed_url = tool_shed_url.split( '://' )[1]
else:
new_tool_shed_url = tool_shed_url
return new_tool_shed_url.rstrip( '/' )
except Exception, e:
# We receive a lot of calls here where the tool_shed_url is None. The container_util uses
# that value when creating a header row. If the tool_shed_url is not None, we have a problem.
if tool_shed_url is not None:
log.exception( "Handled exception removing the protocol from Tool Shed URL %s:\n%s" % ( str( tool_shed_url ), str( e ) ) )
return tool_shed_url
def tool_shed_get( app, base_url, pathspec=[], params={} ):
"""Make contact with the tool shed via the uri provided."""
registry = app.tool_shed_registry
# urllib2 auto-detects system proxies, when passed a Proxyhandler.
# Refer: https://docs.python.org/2/howto/urllib2.html#proxies
proxy = urllib2.ProxyHandler()
urlopener = urllib2.build_opener( proxy )
urllib2.install_opener( urlopener )
password_mgr = registry.password_manager_for_url( base_url )
if password_mgr is not None:
auth_handler = urllib2.HTTPBasicAuthHandler( password_mgr )
urlopener.add_handler( auth_handler )
full_url = url_join( base_url, pathspec=pathspec, params=params )
response = urlopener.open( full_url )
content = response.read()
response.close()
return content
def url_join( base_url, pathspec=None, params=None ):
"""Return a valid URL produced by appending a base URL and a set of request parameters."""
url = base_url.rstrip( '/' )
if pathspec is not None:
if not isinstance( pathspec, basestring ):
pathspec = '/'.join( pathspec )
url = '%s/%s' % ( url, pathspec )
if params is not None:
url = '%s?%s' % ( url, urllib.urlencode( params ) )
return url