Skip to content

Commit

Permalink
cram datatype: support for crai indexes via pysam
Browse files Browse the repository at this point in the history
  • Loading branch information
yhoogstrate committed Nov 27, 2015
1 parent f4dab55 commit 2be9aa2
Showing 1 changed file with 34 additions and 4 deletions.
38 changes: 34 additions & 4 deletions lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,14 +461,44 @@ class CRAM( Binary ):
edam_format = "format_3462"

MetadataElement( name="cram_version", default=None, desc="CRAM Version", param=MetadataParameter, readonly=True, visible=False, optional=False, no_value=None )
MetadataElement( name="cram_index", desc="CRAM Index File", param=metadata.FileParameter, file_ext="crai", readonly=True, no_value=None, visible=False, optional=True )

def set_meta( self, dataset, overwrite=True, **kwd ):
major_version, minor_version = self.get_cram_version( dataset.file_name )
if major_version > 0:
dataset.metadata.cram_version = str(major_version) + "." + str(minor_version)

if not dataset.metadata.bam_index:
index_file = dataset.metadata.spec['cram_index'].param.new_file( dataset=dataset )
self.set_index_file(dataset, index_file)

def get_cram_version( self, filename):
try:
with open(dataset.file_name, "r") as fh:
with open(, "r") as fh:
header = fh.read(6)
dataset.metadata.cram_version = str(ord(header[4])) + "." + str(ord(header[5]))
except Exception as exc:
log.warn( '%s, set_meta Exception: %s', self, exc )
return ord(header[4]), ord(header[5])
except Exception as exc:
log.warn( '%s, set_meta Exception: %s', self, exc )
return -1, -1

def get_index_file(self, dataset, index_file):
try:
## @todo when pysam 1.2.1 or pysam 1.3.0 gets released use pysam.index(alignment, target_idx)
## This currently gives coredump in the current release but is fixed in the dev branch
## xref: https://github.com/samtools/samtools/issues/199

dataset_symlink = os.path.join( os.path.dirname( index_file.file_name ), '__dataset_%d_%s' % ( dataset.id, os.path.basename( index_file.file_name ) ) )
os.symlink( dataset.file_name, dataset_symlink )
pysam.index(bam_file)

# if file dataset_symlink + ".bai" exists
# shutil.move( dataset_symlink + '.bai', index_file.file_name )
# else
# os.unlink( dataset_symlink )
# return False
except:
# Can not read the bam file for some reason
return False

def set_peek( self, dataset, is_multi_byte=False ):
if not dataset.dataset.purged:
Expand Down

0 comments on commit 2be9aa2

Please sign in to comment.