Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/dev' into api.batch
Browse files Browse the repository at this point in the history
  • Loading branch information
carlfeberhard committed Feb 17, 2016
2 parents 69de8d5 + 3ee0594 commit 50b065a
Show file tree
Hide file tree
Showing 67 changed files with 2,539 additions and 2,097 deletions.
12 changes: 0 additions & 12 deletions .ci/flake8_blacklist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,3 @@ scripts/scramble/
scripts/tool_shed/
scripts/tools/
scripts/transfer.py
tools/data_source/
tools/filters/
tools/genomespace/
tools/meme/
tools/metag_tools/
tools/phenotype_association/
tools/plotting/
tools/solid_tools/
tools/sr_assembly/
tools/sr_mapping/
tools/validation/
tools/visualization/
1 change: 0 additions & 1 deletion config/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
<tool file="data_source/hbvar.xml" />
<tool file="genomespace/genomespace_file_browser_prod.xml" />
<tool file="genomespace/genomespace_importer.xml" />
<tool file="validation/fix_errors.xml" />
</section>
<section id="send" name="Send Data">
<tool file="genomespace/genomespace_exporter.xml" />
Expand Down
37 changes: 34 additions & 3 deletions lib/galaxy/datatypes/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,12 +355,28 @@ class SnpEffDb( Text ):
"""Class describing a SnpEff genome build"""
file_ext = "snpeffdb"
MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
MetadataElement( name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None )
MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True)
MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True)

def __init__( self, **kwd ):
Text.__init__( self, **kwd )

# The SnpEff version line was added in SnpEff version 4.1
def getSnpeffVersionFromFile(self, path):
snpeff_version = None
try:
fh = gzip.open(path, 'rb')
buf = fh.read(100)
lines = buf.splitlines()
m = re.match('^(SnpEff)\s+(\d+\.\d+).*$', lines[0].strip())
if m:
snpeff_version = m.groups()[0] + m.groups()[1]
fh.close()
except:
pass
return snpeff_version

def set_meta( self, dataset, **kwd ):
Text.set_meta(self, dataset, **kwd )
data_dir = dataset.extra_files_path
Expand All @@ -370,13 +386,19 @@ def set_meta( self, dataset, **kwd ):
annotations_dict = {'nextProt.bin' : '-nextprot', 'motif.bin': '-motif'}
regulations = []
annotations = []
genome_version = None
snpeff_version = None
if data_dir and os.path.isdir(data_dir):
for root, dirs, files in os.walk(data_dir):
for fname in files:
if fname.startswith('snpEffectPredictor'):
# if snpEffectPredictor.bin download succeeded
genome_version = os.path.basename(root)
dataset.metadata.genome_version = genome_version
# read the first line of the gzipped snpEffectPredictor.bin file to get the SnpEff version
snpeff_version = self.getSnpeffVersionFromFile(os.path.join(root, fname))
if snpeff_version:
dataset.metadata.snpeff_version = snpeff_version
else:
m = re.match(regulation_pattern, fname)
if m:
Expand All @@ -390,7 +412,8 @@ def set_meta( self, dataset, **kwd ):
dataset.metadata.annotation = annotations
try:
fh = file(dataset.file_name, 'w')
fh.write("%s\n" % genome_version)
fh.write("%s\n" % genome_version if genome_version else 'Genome unknown')
fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown')
if annotations:
fh.write("annotations: %s\n" % ','.join(annotations))
if regulations:
Expand Down Expand Up @@ -422,8 +445,8 @@ class SnpSiftDbNSFP( Text ):
"""
def __init__( self, **kwd ):
Text.__init__( self, **kwd )
self.add_composite_file( '%s.grp', description='Group File', substitute_name_with_metadata='reference_name', is_binary=False )
self.add_composite_file( '%s.ti', description='', substitute_name_with_metadata='reference_name', is_binary=False )
self.add_composite_file( '%s.gz', description='dbNSFP bgzip', substitute_name_with_metadata='reference_name', is_binary=True )
self.add_composite_file( '%s.gz.tbi', description='Tabix Index File', substitute_name_with_metadata='reference_name', is_binary=True )

def init_meta( self, dataset, copy_from=None ):
Text.init_meta( self, dataset, copy_from=copy_from )
Expand Down Expand Up @@ -471,3 +494,11 @@ def set_meta( self, dataset, overwrite=True, **kwd ):
self.regenerate_primary_file(dataset)
except Exception as e:
log.warn("set_meta fname: %s %s" % (dataset.file_name if dataset and dataset.file_name else 'Unkwown', str(e)))

def set_peek( self, dataset, is_multi_byte=False ):
if not dataset.dataset.purged:
dataset.peek = '%s : %s' % (dataset.metadata.reference_name, ','.join(dataset.metadata.annotation))
dataset.blurb = '%s' % dataset.metadata.reference_name
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disc'
58 changes: 52 additions & 6 deletions lib/galaxy/managers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
# such as: a single flat class, serializers being singletons in the manager, etc.
# instead of the three separate classes. With no 'apparent' perfect scheme
# I'm opting to just keep them separate.
import datetime
import re

import sqlalchemy
import routes

Expand Down Expand Up @@ -592,7 +595,9 @@ def add_serializers( self ):
the attribute.
"""
self.serializers.update({
'id' : self.serialize_id,
'id' : self.serialize_id,
'create_time' : self.serialize_date,
'update_time' : self.serialize_date,
})

def add_view( self, view_name, key_list, include_keys_from=None ):
Expand Down Expand Up @@ -669,6 +674,18 @@ def serialize_id( self, item, key, **context ):
# Note: it may not be best to encode the id at this layer
return self.app.security.encode_id( id ) if id is not None else None

def serialize_type_id( self, item, key, **context ):
"""
Serialize an type-id for `item`.
"""
TYPE_ID_SEP = '-'
type_id = getattr( item, key )
if type_id is None:
return None
split = type_id.split( TYPE_ID_SEP, 1 )
# Note: it may not be best to encode the id at this layer
return TYPE_ID_SEP.join([ split[0], self.app.security.encode_id( split[1] )])

# serializing to a view where a view is a predefied list of keys to serialize
def serialize_to_view( self, item, view=None, keys=None, default_view=None, **context ):
"""
Expand Down Expand Up @@ -934,8 +951,8 @@ def _add_parsers( self ):
'id' : { 'op': ( 'in' ) },
'encoded_id' : { 'column' : 'id', 'op': ( 'in' ), 'val': self.parse_id_list },
# dates can be directly passed through the orm into a filter (no need to parse into datetime object)
'create_time' : { 'op': ( 'le', 'ge' ) },
'update_time' : { 'op': ( 'le', 'ge' ) },
'create_time' : { 'op': ( 'le', 'ge' ), 'val': self.parse_date },
'update_time' : { 'op': ( 'le', 'ge' ), 'val': self.parse_date },
})

def parse_filters( self, filter_tuple_list ):
Expand Down Expand Up @@ -1016,10 +1033,12 @@ def _parse_orm_filter( self, attr, op, val ):
return None
# attr must be a whitelisted column by attr name or by key passed in column_map
# note: column_map[ 'column' ] takes precedence
column = self.model_class.table.columns.get( attr )
if 'column' in column_map:
remap_to = column_map[ 'column' ]
column = self.model_class.table.columns.get( remap_to )
attr = column_map[ 'column' ]
column = self.model_class.table.columns.get( attr )
if column is None:
# could be a property (hybrid_property, etc.) - assume we can make a filter from it
column = getattr( self.model_class, attr )
if column is None:
# no orm column
return None
Expand Down Expand Up @@ -1091,3 +1110,30 @@ def parse_id_list( self, id_list_string, sep=',' ):
# TODO: move id decoding out
id_list = [ self.app.security.decode_id( id_ ) for id_ in id_list_string.split( sep ) ]
return id_list

def parse_int_list( self, int_list_string, sep=',' ):
"""
Split `int_list_string` at `sep` and parse as ints.
"""
# TODO: move id decoding out
int_list = [ int( v ) for v in int_list_string.split( sep ) ]
return int_list

def parse_date( self, date_string ):
"""
Attempts to get an SQL-able(?) date string for a query filter.
"""
# Attempts to parse epoch int back into date string
try:
epoch = int( date_string )
date = datetime.datetime.fromtimestamp( epoch )
return date.isoformat().replace( 'T', ' ', 1 )
except ValueError:
pass
# or removes T from date string
if not hasattr( self, 'date_string_re' ):
self.date_string_re = re.compile( r'^\d{4}\-\d{2}\-\d{2}T' )
if self.date_string_re.match( date_string ):
return date_string.replace( 'T', ' ', 1 )
# or as is
return date_string
3 changes: 3 additions & 0 deletions lib/galaxy/managers/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ def add_serializers( self ):
self.serializable_keyset.update([ 'name', 'state', 'tool_version', 'extension', 'visible', 'dbkey' ])

def _proxy_to_dataset( self, serializer=None, key=None ):
# dataset associations are (rough) proxies to datasets - access their serializer using this remapping fn
# remapping done by either kwarg key: IOW dataset attr key (e.g. uuid)
# or by kwarg serializer: a function that's passed in (e.g. permissions)
if key:
serializer = self.dataset_serializer.serializers.get( key )
if serializer:
Expand Down
3 changes: 0 additions & 3 deletions lib/galaxy/managers/hdas.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,6 @@ def add_serializers( self ):
'type' : lambda *a, **c: 'file'
})

def serialize_type_id( self, hda, key, **context ):
return 'dataset-' + self.serializers[ 'id' ]( hda, 'id' )

def serialize_display_apps( self, hda, key, trans=None, **context ):
"""
Return dictionary containing new-style display app urls.
Expand Down

0 comments on commit 50b065a

Please sign in to comment.