-
Notifications
You must be signed in to change notification settings - Fork 0
/
Features.py
78 lines (62 loc) · 1.97 KB
/
Features.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import re
import sys
import math
from Bio import SeqIO
"""
"""
AA = ["A", "R", "N", "D", "C", "E", "Q", "G", "H", "I",
"L", "K", "M", "F", "P", "S", "T", "W", "Y", "V"]
NT = ["A","T","C","G"]
"""
"""
def CalculateAAComposition(aaseq):
results = {}
for i in AA:
results[i] = 0 if aaseq.count(i) == 0 else round(float(aaseq.count(i)) / float(len(aaseq)), 5)
return results
"""
"""
def CalculateNTFrequence(ntseq):
results = {}
for i in NT:
results[i] = 0 if ntseq.count(i) == 0 else round(float(ntseq.count(i)) / float(len(ntseq)), 5)
return results
if __name__ == "__main__":
fnt = str(sys.argv[1])
faa = str(sys.argv[2])
# Declare hash
fastant = {}
fastaaa = {}
handle = open(fnt, "r")
for record in SeqIO.parse(handle, "fasta"):
fastant[record.id] = str(record.seq).replace('X','')
handle.close()
handle = open(faa, "r")
for record in SeqIO.parse(handle, "fasta"):
fastaaa[record.id] = str(record.seq).replace('X','')
handle.close()
output = open('Features.csv','w')
#***********************************************************************#
#
#***********************************************************************#
for key in fastant:
res = CalculateNTFrequence(fastant[key])
output.write(key)
# Complete sequence
for r in res:
output.write( ',' + str(res[r]) )
# n terminal
medium = int (len(fastant[key]) / 2)
res = CalculateNTFrequence(fastant[key][0:medium])
for r in res:
output.write(',' + str(res[r]))
# c terminal
medium = int (len(fastant[key]) / 2)
res = CalculateNTFrequence(fastant[key][medium::])
for r in res:
output.write(',' + str(res[r]))
res = CalculateAAComposition(fastaaa[key])
for r in res:
output.write(',' + str(res[r]))
output.write('\n')
output.close()