-
Notifications
You must be signed in to change notification settings - Fork 4
/
hmmer2gff.py
executable file
·56 lines (49 loc) · 1.12 KB
/
hmmer2gff.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python
import argparse, sys
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument("-i", "--input", type=str,
help="HMMER tabular output for domains (--domtblout)")
parser.add_argument("-O", "--output-type", dest="output_type", type=str, default="gff",
help="<gff> <gff3> [default=%(default)s]")
args = parser.parse_args()
def read_hmmer(f, output_type):
openf = sys.stdin if f == "stdin" else open(f)
for line in openf:
if line.startswith("#"):
continue
lsp = line.split()
chr = lsp[0]
gn = lsp[3]
acc = lsp[4]
start = lsp[17]
end = lsp[18]
# GFF3 output
if output_type == "gff3":
gfftags = (
"Name=%s;" %(gn),
"ID=%s;" %(acc),
)
gfftags = "".join(gfftags)
# GFF output
if output_type == "gff":
gfftags = (
'gene_id "%s";' %(gn),
'transcript_id "%s";' %(gn),
)
gfftags = " ".join(gfftags)
gffl = (
chr,
"HMMER",
"DOMAIN",
start,
end,
".",
".",
".",
gfftags,
)
print "\t".join(gffl)
return
if __name__ == "__main__":
read_hmmer(args.input, args.output_type)