Skip to content

Commit

Permalink
Merge pull request #721 from dib-lab/add-oxli-datatypes
Browse files Browse the repository at this point in the history
add Oxli datatypes
  • Loading branch information
bgruening committed Sep 18, 2015
2 parents 304e569 + 02df796 commit ce9fe44
Show file tree
Hide file tree
Showing 8 changed files with 172 additions and 0 deletions.
12 changes: 12 additions & 0 deletions config/datatypes_conf.xml.sample
Expand Up @@ -412,6 +412,12 @@
<datatype extension="xmfa" type="galaxy.datatypes.msa:MauveXmfa" display_in_upload="True" />

<datatype extension="RData" type="galaxy.datatypes.binary:RData" display_in_upload="true" description="Stored data from an R session"/>
<datatype extension="oxlicg" type="galaxy.datatypes.binary:OxliCountGraph" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="oxling" type="galaxy.datatypes.binary:OxliNodeGraph" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="oxlits" type="galaxy.datatypes.binary:OxliTagSet" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="oxlist" type="galaxy.datatypes.binary:OxliStopTags" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="oxliss" type="galaxy.datatypes.binary:OxliSubset" mimetype="application/octet-stream" display_in_upload="true"/>
<datatype extension="oxligl" type="galaxy.datatypes.binary:OxliGraphLabels" mimetype="application/octet-stream" display_in_upload="true"/>
</registration>
<sniffers>
<!--
Expand Down Expand Up @@ -500,6 +506,12 @@
module is fixed to not read the entire file.
<sniffer type="galaxy.datatypes.assembly:Amos"/>
-->
<sniffer type="galaxy.datatypes.binary:OxliCountGraph"/>
<sniffer type="galaxy.datatypes.binary:OxliNodeGraph"/>
<sniffer type="galaxy.datatypes.binary:OxliTagSet"/>
<sniffer type="galaxy.datatypes.binary:OxliStopTags"/>
<sniffer type="galaxy.datatypes.binary:OxliSubset"/>
<sniffer type="galaxy.datatypes.binary:OxliGraphLabels"/>
</sniffers>
<build_sites>
<!--
Expand Down
160 changes: 160 additions & 0 deletions lib/galaxy/datatypes/binary.py
Expand Up @@ -893,3 +893,163 @@ def sniff( self, filename ):
return False

Binary.register_sniffable_binary_format('RData', 'RData', RData)


class OxliBinary(Binary):

@staticmethod
def _sniff(filename, oxlitype):
try:
with open(filename) as fileobj:
header = fileobj.read(4)
if binascii.b2a_hex(header) == binascii.hexlify('OXLI'):
fileobj.read(1) # skip the version number
ftype = fileobj.read(1)
if binascii.b2a_hex(ftype) == oxlitype:
return True
return False
except IOError:
return False


class OxliCountGraph(OxliBinary):
"""
OxliCountGraph starts with "OXLI" + one byte version number +
8-bit binary '1'
Test file generated via `load-into-counting.py --n_tables 1 \
--max-tablesize 1 oxli_countgraph.oxlicg \
khmer/tests/test-data/100-reads.fq.bz2`
using khmer 2.0
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliCountGraph().sniff( fname )
False
>>> fname = get_test_fname( "oxli_countgraph.oxlicg" )
>>> OxliCountGraph().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "01")

Binary.register_sniffable_binary_format("oxli.countgraph", "oxlicg",
OxliCountGraph)


class OxliNodeGraph(OxliBinary):
"""
OxliNodeGraph starts with "OXLI" + one byte version number +
8-bit binary '2'
Test file generated via `load-graph.py --n_tables 1 \
--max-tablesize 1 oxli_nodegraph.oxling \
khmer/tests/test-data/100-reads.fq.bz2`
using khmer 2.0
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliNodeGraph().sniff( fname )
False
>>> fname = get_test_fname( "oxli_nodegraph.oxling" )
>>> OxliNodeGraph().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "02")

Binary.register_sniffable_binary_format("oxli.nodegraph", "oxling",
OxliNodeGraph)


class OxliTagSet(OxliBinary):
"""
OxliTagSet starts with "OXLI" + one byte version number +
8-bit binary '3'
Test file generated via `load-graph.py --n_tables 1 \
--max-tablesize 1 oxli_nodegraph.oxling \
khmer/tests/test-data/100-reads.fq.bz2; \
mv oxli_nodegraph.oxling.tagset oxli_tagset.oxlits`
using khmer 2.0
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliTagSet().sniff( fname )
False
>>> fname = get_test_fname( "oxli_tagset.oxlits" )
>>> OxliTagSet().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "03")

Binary.register_sniffable_binary_format("oxli.tagset", "oxlits", OxliTagSet)


class OxliStopTags(OxliBinary):
"""
OxliStopTags starts with "OXLI" + one byte version number +
8-bit binary '4'
Test file adapted from khmer 2.0's
"khmer/tests/test-data/goodversion-k32.stoptags"
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliStopTags().sniff( fname )
False
>>> fname = get_test_fname( "oxli_stoptags.oxlist" )
>>> OxliStopTags().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "04")

Binary.register_sniffable_binary_format("oxli.stoptags", "oxlist",
OxliStopTags)


class OxliSubset(OxliBinary):
"""
OxliSubset starts with "OXLI" + one byte version number +
8-bit binary '5'
Test file generated via `load-graph.py -k 20 example \
tests/test-data/random-20-a.fa; \
partition-graph.py example; \
mv example.subset.0.pmap oxli_subset.oxliss`
using khmer 2.0
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliSubset().sniff( fname )
False
>>> fname = get_test_fname( "oxli_subset.oxliss" )
>>> OxliSubset().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "05")

Binary.register_sniffable_binary_format("oxli.subset", "oxliss", OxliSubset)


class OxliGraphLabels(OxliBinary):
"""
OxliGraphLabels starts with "OXLI" + one byte version number +
8-bit binary '6'
Test file generated via `python -c "from khmer import GraphLabels; \
gl = GraphLabels(20, 1e7, 4); gl.consume_fasta_and_tag_with_labels(
'tests/test-data/test-labels.fa'); \
gl.save_labels_and_tags('oxli_graphlabels.oxligl')"`
using khmer 2.0
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname( 'sequence.csfasta' )
>>> OxliGraphLabels().sniff( fname )
False
>>> fname = get_test_fname( "oxli_graphlabels.oxligl" )
>>> OxliGraphLabels().sniff( fname )
True
"""

def sniff(self, filename):
return OxliBinary._sniff(filename, "06")

Binary.register_sniffable_binary_format("oxli.graphlabels", "oxligl",
OxliGraphLabels)
Binary file added lib/galaxy/datatypes/test/oxli_countgraph.oxlicg
Binary file not shown.
Binary file added lib/galaxy/datatypes/test/oxli_graphlabels.oxligl
Binary file not shown.
Binary file added lib/galaxy/datatypes/test/oxli_nodegraph.oxling
Binary file not shown.
Binary file added lib/galaxy/datatypes/test/oxli_stoptags.oxlist
Binary file not shown.
Binary file added lib/galaxy/datatypes/test/oxli_subset.oxliss
Binary file not shown.
Binary file added lib/galaxy/datatypes/test/oxli_tagset.oxlits
Binary file not shown.

0 comments on commit ce9fe44

Please sign in to comment.