-
Notifications
You must be signed in to change notification settings - Fork 7
/
Coverage_get.py
43 lines (43 loc) · 1.55 KB
/
Coverage_get.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import argparse
parser = argparse.ArgumentParser(description='Calculate the coverage for each gene number')
parser.add_argument('-i','--orf2gene',help='orf2gene file')
parser.add_argument('-t','--tsv',help='Coverage of all orfs which is generated by CheckM')
parser.add_argument('-o','--out',help='output file name')
args = parser.parse_args()
#def cal(orf2gene,tsv,out):
def cal(tsv,orf2gene,out):
orf2cov = {}
with open(tsv,'r') as tsvfile:
for orf in tsvfile:
try:
orf2cov[orf.split('\t')[0]]=orf.split('\t')[3]
except KeyError:
print (orf + 'cannot be splited!')
PCGlist = []
with open(orf2gene,'r') as mapfile:
for key in mapfile:
gene = key.split('\t')[1].strip()
ORF = key.split('\t')[0]
try:
if float(orf2cov[ORF]) != 0:
#print (ORF + '\t' + gene + '\t' + orf2cov[ORF])
PCGlist.append(gene + '~' + orf2cov[ORF])
except KeyError:
print (ORF + ' cannot be found in quanf.sf!')
PCGs = []
for t in PCGlist:
PCGs.append(t.split('~')[0])
PCGs=list(set(PCGs))
PCGs.sort()
output = open(out,'w')
for a in PCGs:
count = 0
for b in PCGlist:
if b.startswith(a):
count = count + float(b.split('~')[1])
output.write(a + '\t' + str(count) + '\n')
#print(a + '\t' + str(count))
output.close()
if __name__ == '__main__':
print('Dealing with '+args.tsv)
cal(args.tsv,args.orf2gene,args.out)