Skip to content

Commit

Permalink
Merge pull request #5023 from bgruening/cooler
Browse files Browse the repository at this point in the history
add cool datatype
  • Loading branch information
jmchilton committed Nov 20, 2017
2 parents db2c3a4 + 1718f8d commit 3e2e149
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 1 deletion.
2 changes: 2 additions & 0 deletions config/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@
<datatype extension="toolshed.gz" type="galaxy.datatypes.binary:Binary" mimetype="multipart/x-gzip" subclass="true" />
<datatype extension="h5" type="galaxy.datatypes.binary:H5" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="mz5" type="galaxy.datatypes.binary:H5" subclass="true" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="cool" type="galaxy.datatypes.binary:Cool" subclass="true" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="html" type="galaxy.datatypes.text:Html" mimetype="text/html"/>
<datatype extension="interval" type="galaxy.datatypes.interval:Interval" display_in_upload="true" description="File must start with definition line in the following format (columns may be in any order)." >
<converter file="interval_to_bed_converter.xml" target_datatype="bed"/>
Expand Down Expand Up @@ -677,6 +678,7 @@
<sniffer type="galaxy.datatypes.binary:MzSQlite"/>
<sniffer type="galaxy.datatypes.binary:IdpDB"/>
<sniffer type="galaxy.datatypes.binary:SQlite"/>
<sniffer type="galaxy.datatypes.binary:Cool"/>
<sniffer type="galaxy.datatypes.binary:Biom2"/>
<sniffer type="galaxy.datatypes.binary:H5"/>
<sniffer type="galaxy.datatypes.binary:Bam"/>
Expand Down
55 changes: 54 additions & 1 deletion lib/galaxy/datatypes/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from galaxy.util.checkers import is_bz2, is_gzip
from . import data, dataproviders


log = logging.getLogger(__name__)

# Currently these supported binary data types must be manually set on upload
Expand Down Expand Up @@ -890,6 +889,60 @@ def display_peek(self, dataset):
return "Biom2 (HDF5) file (%s)" % (nice_size(dataset.get_size()))


class Cool(H5):
"""
Class describing the cool format (https://github.com/mirnylab/cooler)
"""

file_ext = "cool"

def sniff(self, filename):
"""
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'matrix.cool' )
>>> Cool().sniff( fname )
True
>>> fname = get_test_fname( 'test.mz5' )
>>> Cool().sniff( fname )
False
>>> fname = get_test_fname( 'wiggle.wig' )
>>> Cool().sniff( fname )
False
>>> fname = get_test_fname( 'biom2_sparse_otu_table_hdf5.biom' )
>>> Cool().sniff( fname )
False
"""

MAGIC = "HDF5::Cooler"
URL = "https://github.com/mirnylab/cooler"

if super(Cool, self).sniff(filename):
keys = ['chroms', 'bins', 'pixels', 'indexes']
with h5py.File(filename, 'r') as handle:
fmt = handle.attrs.get('format', None)
url = handle.attrs.get('format-url', None)
if fmt == MAGIC or url == URL:
if not all(name in handle.keys() for name in keys):
return False
return True
return False

def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = "Cool (HDF5) file for storing genomic interaction data."
dataset.blurb = nice_size(dataset.get_size())
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'

def display_peek(self, dataset):
try:
return dataset.peek
except Exception:
return "Cool (HDF5) file (%s)." % (nice_size(dataset.get_size()))


Binary.register_sniffable_binary_format("cool", "cool", Cool)
Binary.register_sniffable_binary_format("biom2", "biom2", Biom2)
Binary.register_sniffable_binary_format("h5", "h5", H5)

Expand Down
Binary file added lib/galaxy/datatypes/test/matrix.cool
Binary file not shown.

0 comments on commit 3e2e149

Please sign in to comment.