Skip to content

Commit

Permalink
Preview feature for Bam datatype
Browse files Browse the repository at this point in the history
  • Loading branch information
ashvark committed Jul 6, 2017
1 parent d168115 commit 8c8aa88
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
54 changes: 53 additions & 1 deletion lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE

from galaxy.datatypes import metadata
from galaxy.datatypes.tabular import Sam
from galaxy.datatypes.metadata import DictParameter, ListParameter, MetadataElement, MetadataParameter
from galaxy.util import FILENAME_VALID_CHARS, nice_size, sqlite, which
from . import data, dataproviders
Expand Down Expand Up @@ -220,7 +221,7 @@ class GenericAsn1Binary( Binary ):


@dataproviders.decorators.has_dataproviders
class Bam( Binary ):
class Bam( Binary , Sam):
"""Class describing a BAM binary file"""
edam_format = "format_2572"
edam_data = "data_0863"
Expand All @@ -236,6 +237,10 @@ class Bam( Binary ):
MetadataElement( name="reference_lengths", default=[], desc="Chromosome Lengths", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value=[] )
MetadataElement( name="bam_header", default={}, desc="Dictionary of BAM Headers", param=MetadataParameter, readonly=True, visible=False, optional=True, no_value={} )

def __init__(self, **kwd):
"""Initialize taxonomy datatype"""
super( Bam, self ).__init__( **kwd )

def _get_samtools_version( self ):
version = '0.0.0'
samtools_exec = which('samtools')
Expand Down Expand Up @@ -462,6 +467,53 @@ def to_archive(self, trans, dataset, name=""):
file_paths.append(dataset.metadata.bam_index.file_name)
return zip(file_paths, rel_paths)


def get_chunk(self, trans, dataset, offset=0, ck_size=None):
bamfile = pysam.AlignmentFile(dataset.file_name, "rb")
ck_size = 100 # 100 lines
ck_data=""
lineNumber=0
if(offset == 0):
ck_data = bamfile.text
for f in bamfile.fetch(until_eof=True):
lineNumber+=1
if (lineNumber > offset and lineNumber <= (offset + ck_size)):
bamline = f.tostring(bamfile)
# Galaxy display each tag as separate column because 'tostring()' funcition put spaces in between each tag of tags column.
# Below code will remove spaces between each tag.
bamlineModified = ('\t').join(bamline.split()[:11] + [('').join(bamline.split()[11:])])
ck_data=ck_data +"\n" + bamlineModified
elif (lineNumber > (offset + ck_size)):
break
last_read = offset + ck_size
return dumps( { 'ck_data': util.unicodify( ck_data ),
'offset': last_read } )

def display_data( self, trans, dataset, preview=False, filename=None, to_ext=None, offset=None, ck_size=None, **kwd):
preview = util.string_as_bool( preview )
if offset is not None:
return self.get_chunk(trans, dataset, offset, ck_size)
elif to_ext or not preview:
return super( Bam, self ).display_data( trans, dataset, preview, filename, to_ext, **kwd )
else:
column_names = 'null'
if dataset.metadata.column_names:
column_names = dataset.metadata.column_names
elif hasattr(dataset.datatype, 'column_names'):
column_names = dataset.datatype.column_names
column_types = dataset.metadata.column_types
if not column_types:
column_types = []
column_number = dataset.metadata.columns
if column_number is None:
column_number = 'null'
return trans.fill_template( "/dataset/tabular_chunked.mako",
dataset=dataset,
chunk=self.get_chunk(trans, dataset, 0),
column_number=column_number,
column_names=column_names,
column_types=column_types )

# ------------- Dataproviders
# pipe through samtools view
# ALSO: (as Sam)
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/sniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def handle_uploaded_dataset_file( filename, datatypes_registry, ext='auto', is_m
ext = guess_ext( filename, sniff_order=datatypes_registry.sniff_order, is_multi_byte=is_multi_byte )

if check_binary( filename ):
if not Binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
if not galaxy.datatypes.binary.is_ext_unsniffable(ext) and not datatypes_registry.get_datatype_by_extension( ext ).sniff( filename ):
raise InappropriateDatasetContentError( 'The binary uploaded file contains inappropriate content.' )
elif check_html( filename ):
raise InappropriateDatasetContentError( 'The uploaded file contains inappropriate HTML content.' )
Expand Down

0 comments on commit 8c8aa88

Please sign in to comment.