Skip to content

Commit

Permalink
add augustus and snap datatypes
Browse files Browse the repository at this point in the history
  • Loading branch information
abretaud committed Oct 16, 2017
1 parent ebdadb6 commit 86929ff
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
4 changes: 4 additions & 0 deletions config/datatypes_conf.xml.sample
Expand Up @@ -488,6 +488,9 @@
<datatype extension="score" type="galaxy.datatypes.data:Text" subclass="true"/>
<datatype extension="srs" type="galaxy.datatypes.data:Text" subclass="true"/>
<datatype extension="srspair" type="galaxy.datatypes.data:Text" subclass="true"/>
<!-- Annotation Datatypes -->
<datatype extension="snaphmm" type="galaxy.datatypes.annotation:SnapHmm" display_in_upload="true" />
<datatype extension="augustus" type="galaxy.datatypes.annotation:Augustus" display_in_upload="true" />
<!-- MSA Datatypes -->
<datatype extension="hmm2" type="galaxy.datatypes.msa:Hmmer2" display_in_upload="true" />
<datatype extension="hmm3" type="galaxy.datatypes.msa:Hmmer3" display_in_upload="true" />
Expand Down Expand Up @@ -679,6 +682,7 @@
<sniffer type="galaxy.datatypes.binary:Fast5ArchiveGz" />
<sniffer type="galaxy.datatypes.binary:Fast5ArchiveBz2" />
<sniffer type="galaxy.datatypes.binary:Fast5Archive" />
<sniffer type="galaxy.datatypes.annotation:Augustus" />
<sniffer type="galaxy.datatypes.triples:Rdf"/>
<sniffer type="galaxy.datatypes.blast:BlastXml"/>
<sniffer type="galaxy.datatypes.xml:Phyloxml"/>
Expand Down
85 changes: 85 additions & 0 deletions lib/galaxy/datatypes/annotation.py
@@ -0,0 +1,85 @@
import logging
import tarfile

from galaxy.datatypes.binary import Binary, CompressedArchive
from galaxy.datatypes.data import get_file_peek, Text
from galaxy.util import nice_size

log = logging.getLogger(__name__)


class SnapHmm(Text):
file_ext = "snaphmm"

def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
dataset.blurb = "SNAP HMM model"
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disc'

def display_peek(self, dataset):
try:
return dataset.peek
except:
return "SNAP HMM model (%s)" % (nice_size(dataset.get_size()))

def sniff(self, filename):
"""
SNAP model files start with zoeHMM
"""
with open(filename, 'r') as handle:
return handle.read(6) == 'zoeHMM'
return False


class Augustus(CompressedArchive):
"""
Class describing an Augustus prediction model
"""
file_ext = "augustus"
compressed = True

def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = "Augustus model"
dataset.blurb = nice_size(dataset.get_size())
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'

def display_peek(self, dataset):
try:
return dataset.peek
except:
return "Augustus model (%s)" % (nice_size(dataset.get_size()))

def sniff(self, filename):
"""
Augustus archives always contain the same files
"""
try:
if filename and tarfile.is_tarfile(filename):
with tarfile.open(filename, 'r') as temptar:
for f in temptar:
if not f.isfile():
continue
if f.name.endswith('_exon_probs.pbl') \
or f.name.endswith('_igenic_probs.pbl') \
or f.name.endswith('_intron_probs.pbl') \
or f.name.endswith('_metapars.cfg') \
or f.name.endswith('_metapars.utr.cfg') \
or f.name.endswith('_parameters.cfg') \
or f.name.endswith('_parameters.cgp.cfg') \
or f.name.endswith('_utr_probs.pbl') \
or f.name.endswith('_weightmatrix.txt'):
return True
else:
return False
except Exception as e:
log.warning('%s, sniff Exception: %s', self, e)
return False


Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)

0 comments on commit 86929ff

Please sign in to comment.