/
histories.py
374 lines (316 loc) · 15.2 KB
/
histories.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
"""
Manager and Serializer for histories.
Histories are containers for datasets or dataset collections
created (or copied) by users over the course of an analysis.
"""
from galaxy import model
from galaxy.managers import sharable
from galaxy.managers import deletable
from galaxy.managers import hdas
from galaxy.managers.collections_util import dictify_dataset_collection_instance
import logging
log = logging.getLogger( __name__ )
class HistoryManager( sharable.SharableModelManager, deletable.PurgableManagerMixin ):
model_class = model.History
foreign_key_name = 'history'
user_share_model = model.HistoryUserShareAssociation
tag_assoc = model.HistoryTagAssociation
annotation_assoc = model.HistoryAnnotationAssociation
rating_assoc = model.HistoryRatingAssociation
# TODO: incorporate imp/exp (or alias to)
def __init__( self, app, *args, **kwargs ):
super( HistoryManager, self ).__init__( app, *args, **kwargs )
self.hda_manager = hdas.HDAManager( app )
def copy( self, trans, history, user, **kwargs ):
"""
Copy and return the given `history`.
"""
return history.copy( target_user=user, **kwargs )
# .... sharable
# overriding to handle anonymous users' current histories in both cases
def by_user( self, trans, user, **kwargs ):
"""
Get all the histories for a given user (allowing anon users' theirs)
ordered by update time.
"""
# handle default and/or anonymous user (which still may not have a history yet)
if self.user_manager.is_anonymous( user ):
current_history = self.get_current( trans )
return [ current_history ] if current_history else []
return super( HistoryManager, self ).by_user( trans, user, **kwargs )
def is_owner( self, trans, history, user ):
"""
True if the current user is the owner of the given history.
"""
# anon users are only allowed to view their current history
if self.user_manager.is_anonymous( user ) and history == self.get_current( trans ):
return True
return super( HistoryManager, self ).is_owner( trans, history, user )
# TODO: possibly to sharable
def most_recent( self, trans, user, filters=None, **kwargs ):
"""
Return the most recently update history for the user.
If user is anonymous, return the current history. If the user is anonymous
and the current history is deleted, return None.
"""
# TODO: trans
if not user:
current_history = self.get_current( trans )
return None if ( not current_history or current_history.deleted ) else current_history
desc_update_time = self.model_class.table.c.update_time
filters = self._munge_filters( filters, self.model_class.user_id == user.id )
# TODO: normalize this return value
return self.query( trans, filters=filters, order_by=desc_update_time, limit=1, **kwargs ).first()
# .... purgable
def purge( self, trans, history, flush=True, **kwargs ):
"""
Purge this history and all HDAs, Collections, and Datasets inside this history.
"""
self.hda_manager.dataset_manager.error_unless_dataset_purge_allowed( trans, history )
# First purge all the datasets
for hda in history.datasets:
if not hda.purged:
self.hda_manager.purge( trans, hda, flush=True )
# Now mark the history as purged
super( HistoryManager, self ).purge( trans, history, flush=flush, **kwargs )
# .... current
def get_current( self, trans ):
"""
Return the current history.
"""
# TODO: trans
return trans.get_history()
def set_current( self, trans, history ):
"""
Set the current history.
"""
# TODO: trans
trans.set_history( history )
return history
def set_current_by_id( self, trans, history_id ):
"""
Set the current history by an id.
"""
return self.set_current( trans, self.by_id( trans, history_id ) )
# .... serialization
# TODO: move to serializer (i.e. history with contents attr)
def _get_history_data( self, trans, history ):
"""
Returns a dictionary containing ``history`` and ``contents``, serialized
history and an array of serialized history contents respectively.
"""
# TODO: instantiate here? really?
history_serializer = HistorySerializer( self.app )
hda_serializer = hdas.HDASerializer( self.app )
history_dictionary = {}
contents_dictionaries = []
try:
history_dictionary = history_serializer.serialize_to_view( trans, history, view='detailed' )
for content in history.contents_iter( types=[ 'dataset', 'dataset_collection' ] ):
contents_dict = {}
if isinstance( content, model.HistoryDatasetAssociation ):
contents_dict = hda_serializer.serialize_to_view( trans, content, view='detailed' )
elif isinstance( content, model.HistoryDatasetCollectionAssociation ):
try:
service = self.app.dataset_collections_service
dataset_collection_instance = service.get_dataset_collection_instance(
trans=trans,
instance_type='history',
id=self.app.security.encode_id( content.id ),
)
contents_dict = dictify_dataset_collection_instance( dataset_collection_instance,
security=self.app.security,
parent=dataset_collection_instance.history,
view="element" )
except Exception, exc:
log.exception( "Error in history API at listing dataset collection: %s", exc )
# TODO: return some dict with the error
contents_dictionaries.append( contents_dict )
except Exception, exc:
user_id = str( trans.user.id ) if trans.user else '(anonymous)'
log.exception( 'Error bootstrapping history for user %s: %s', user_id, str( exc ) )
message = ( 'An error occurred getting the history data from the server. '
'Please contact a Galaxy administrator if the problem persists.' )
history_dictionary[ 'error' ] = message
return { 'history': history_dictionary,
'contents': contents_dictionaries }
# remove this
def get_state_counts( self, trans, history, exclude_deleted=True, exclude_hidden=False ):
"""
Return a dictionary keyed to possible dataset states and valued with the number
of datasets in this history that have those states.
"""
# TODO: the default flags above may not make a lot of sense (T,T?)
state_counts = {}
for state in model.Dataset.states.values():
state_counts[ state ] = 0
# TODO:?? collections and coll. states?
for hda in history.datasets:
if exclude_deleted and hda.deleted:
continue
if exclude_hidden and not hda.visible:
continue
state_counts[ hda.state ] = state_counts[ hda.state ] + 1
return state_counts
# remove this
def get_state_ids( self, trans, history ):
"""
Return a dictionary keyed to possible dataset states and valued with lists
containing the ids of each HDA in that state.
"""
state_ids = {}
for state in model.Dataset.states.values():
state_ids[ state ] = []
# TODO:?? collections and coll. states?
for hda in history.datasets:
# TODO: do not encode ids at this layer
encoded_id = self.app.security.encode_id( hda.id )
state_ids[ hda.state ].append( encoded_id )
return state_ids
# TODO: remove this (is state used/useful?)
def get_history_state( self, trans, history ):
"""
Returns the history state based on the states of the HDAs it contains.
"""
states = model.Dataset.states
# (default to ERROR)
state = states.ERROR
# TODO: history_state and state_counts are classically calc'd at the same time
# so this is rel. ineff. - if we keep this...
hda_state_counts = self.get_state_counts( trans, history, exclude_deleted=False )
num_hdas = sum( hda_state_counts.values() )
if num_hdas == 0:
state = states.NEW
else:
if ( hda_state_counts[ states.RUNNING ] > 0
or hda_state_counts[ states.SETTING_METADATA ] > 0
or hda_state_counts[ states.UPLOAD ] > 0 ):
state = states.RUNNING
# TODO: this method may be more useful if we *also* polled the histories jobs here too
elif hda_state_counts[ states.QUEUED ] > 0:
state = states.QUEUED
elif ( hda_state_counts[ states.ERROR ] > 0
or hda_state_counts[ states.FAILED_METADATA ] > 0 ):
state = states.ERROR
elif hda_state_counts[ states.OK ] == num_hdas:
state = states.OK
return state
class HistorySerializer( sharable.SharableModelSerializer, deletable.PurgableSerializerMixin ):
"""
Interface/service object for serializing histories into dictionaries.
"""
SINGLE_CHAR_ABBR = 'h'
def __init__( self, app ):
super( HistorySerializer, self ).__init__( app )
self.history_manager = HistoryManager( app )
self.hda_manager = hdas.HDAManager( app )
self.hda_serializer = hdas.HDASerializer( app )
self.default_view = 'summary'
self.add_view( 'summary', [
'id',
'model_class',
'name',
'deleted',
'purged',
# 'count'
'url',
# TODO: why these?
'published',
'annotation',
'tags',
])
self.add_view( 'detailed', [
'contents_url',
# 'hdas',
'empty',
'size', 'nice_size',
'user_id',
'create_time', 'update_time',
'importable', 'slug', 'username_and_slug',
'genome_build',
# TODO: remove the next three - instead getting the same info from the 'hdas' list
'state',
'state_details',
'state_ids',
# in the Historys' case, each of these views includes the keys from the previous
], include_keys_from='summary' )
# assumes: outgoing to json.dumps and sanitized
def add_serializers( self ):
super( HistorySerializer, self ).add_serializers()
deletable.PurgableSerializerMixin.add_serializers( self )
self.serializers.update({
'model_class' : lambda *a: 'History',
'id' : self.serialize_id,
'create_time' : self.serialize_date,
'update_time' : self.serialize_date,
'size' : lambda t, i, k: int( i.get_disk_size() ),
'nice_size' : lambda t, i, k: i.get_disk_size( nice_size=True ),
'state' : lambda t, i, k: self.history_manager.get_history_state( t, i ),
'url' : lambda t, i, k: self.url_for( 'history', id=t.security.encode_id( i.id ) ),
'contents_url' : lambda t, i, k: self.url_for( 'history_contents',
history_id=t.security.encode_id( i.id ) ),
'empty' : lambda t, i, k: ( len( i.datasets ) + len( i.dataset_collections ) ) <= 0,
'count' : lambda trans, item, key: len( item.datasets ),
'hdas' : lambda t, i, k: [ t.security.encode_id( hda.id ) for hda in i.datasets ],
'state_details' : lambda t, i, k: self.history_manager.get_state_counts( t, i ),
'state_ids' : lambda t, i, k: self.history_manager.get_state_ids( t, i ),
'contents' : self.serialize_contents
})
def serialize_contents( self, trans, history, *args ):
contents_dictionaries = []
for content in history.contents_iter( types=[ 'dataset', 'dataset_collection' ] ):
contents_dict = {}
if isinstance( content, model.HistoryDatasetAssociation ):
contents_dict = self.hda_serializer.serialize_to_view( trans, content, view='detailed' )
elif isinstance( content, model.HistoryDatasetCollectionAssociation ):
contents_dict = self.serialize_collection( trans, content )
contents_dictionaries.append( contents_dict )
return contents_dictionaries
def serialize_collection( self, trans, collection ):
service = self.app.dataset_collections_service
dataset_collection_instance = service.get_dataset_collection_instance(
trans=trans,
instance_type='history',
id=self.security.encode_id( collection.id ),
)
return dictify_dataset_collection_instance( dataset_collection_instance,
security=self.app.security,
parent=dataset_collection_instance.history,
view="element" )
class HistoryDeserializer( sharable.SharableModelDeserializer, deletable.PurgableDeserializerMixin ):
"""
Interface/service object for validating and deserializing dictionaries into histories.
"""
model_manager_class = HistoryManager
def __init__( self, app ):
super( HistoryDeserializer, self ).__init__( app )
self.history_manager = self.manager
def add_deserializers( self ):
super( HistoryDeserializer, self ).add_deserializers()
deletable.PurgableDeserializerMixin.add_deserializers( self )
self.deserializers.update({
'name': self.deserialize_basestring,
'genome_build': self.deserialize_genome_build,
})
class HistoryFilters( sharable.SharableModelFilters, deletable.PurgableFiltersMixin ):
model_class = model.History
def _add_parsers( self ):
super( HistoryFilters, self )._add_parsers()
deletable.PurgableFiltersMixin._add_parsers( self )
self.orm_filter_parsers.update({
# history specific
'name': { 'op': ( 'eq', 'contains', 'like' ) },
'genome_build': { 'op': ( 'eq', 'contains', 'like' ) },
})
# TODO: I'm not entirely convinced this (or tags) are a good idea for filters since they involve a/the user
self.fn_filter_parsers.update({
# TODO: add this in annotatable mixin
'annotation': { 'op': { 'has': self.filter_annotation_contains, } },
# TODO: add this in taggable mixin
'tag': {
'op': {
'eq': self.filter_has_tag,
'has': self.filter_has_partial_tag,
}
}
})