/
annotation.py
85 lines (71 loc) · 2.83 KB
/
annotation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import logging
import tarfile
from galaxy.datatypes.binary import Binary, CompressedArchive
from galaxy.datatypes.data import get_file_peek, Text
from galaxy.util import nice_size
log = logging.getLogger(__name__)
class SnapHmm(Text):
file_ext = "snaphmm"
def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = get_file_peek(dataset.file_name, is_multi_byte=is_multi_byte)
dataset.blurb = "SNAP HMM model"
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disc'
def display_peek(self, dataset):
try:
return dataset.peek
except:
return "SNAP HMM model (%s)" % (nice_size(dataset.get_size()))
def sniff(self, filename):
"""
SNAP model files start with zoeHMM
"""
with open(filename, 'r') as handle:
return handle.read(6) == 'zoeHMM'
return False
class Augustus(CompressedArchive):
"""
Class describing an Augustus prediction model
"""
file_ext = "augustus"
compressed = True
def set_peek(self, dataset, is_multi_byte=False):
if not dataset.dataset.purged:
dataset.peek = "Augustus model"
dataset.blurb = nice_size(dataset.get_size())
else:
dataset.peek = 'file does not exist'
dataset.blurb = 'file purged from disk'
def display_peek(self, dataset):
try:
return dataset.peek
except:
return "Augustus model (%s)" % (nice_size(dataset.get_size()))
def sniff(self, filename):
"""
Augustus archives always contain the same files
"""
try:
if filename and tarfile.is_tarfile(filename):
with tarfile.open(filename, 'r') as temptar:
for f in temptar:
if not f.isfile():
continue
if f.name.endswith('_exon_probs.pbl') \
or f.name.endswith('_igenic_probs.pbl') \
or f.name.endswith('_intron_probs.pbl') \
or f.name.endswith('_metapars.cfg') \
or f.name.endswith('_metapars.utr.cfg') \
or f.name.endswith('_parameters.cfg') \
or f.name.endswith('_parameters.cgp.cfg') \
or f.name.endswith('_utr_probs.pbl') \
or f.name.endswith('_weightmatrix.txt'):
return True
else:
return False
except Exception as e:
log.warning('%s, sniff Exception: %s', self, e)
return False
Binary.register_sniffable_binary_format("augustus", "augustus", Augustus)