-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
xbb_translations.py
124 lines (100 loc) · 4.72 KB
/
xbb_translations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python
# Created: Wed Jun 21 15:53:22 2000
# Last changed: Time-stamp: <00/12/02 15:56:27 thomas>
# thomas@cbs.dtu.dk, http://www.cbs.dtu.dk/thomas
# File: xbb_translations.py
import sys
import time
sys.path.insert(0, '.')
from Tkinter import *
from Bio.Seq import reverse_complement, translate
from Bio.SeqUtils import GC
class xbb_translations:
def __init__(self):
""
def frame1(self, seq, translation_table = 1):
return translate(seq, table=translation_table)
def complement(self, seq):
#TODO - use Seq methods instead of this hack:?
return reverse_complement(seq)[::-1]
def reverse(self, seq):
return seq[::-1]
def antiparallel(self, seq):
return reverse_complement(seq)
def frame(self, seq, frame, translation_table = 1):
if not ((-3 <= frame <= -1) or (1 <= frame <= 3)):
frame = 1
if frame != 1 :
raise NotImplementedError
#TODO - Support the frame argument
#The old code didn't, but I can guess from
#the code the expected 1,2,3 for the forward
#strands and -1,-2,-3 for the reverse.
return translate(seq, table=translation_table)
def header_nice(self, txt, seq):
length = len(seq)
if length > 20:
short = '%s ... %s' % (seq[:10], seq[-10:])
else:
short = seq
date = time.strftime('%y %b %d, %X', time.localtime(time.time()))
res = '%s: %s, ' % (txt,date)
for nt in ['a','t','g','c']:
res += '%s:%d ' % (nt, seq.count(nt.upper()))
res += '\nSequence: %s, %d nt, %0.2f %%GC\n' % (short.lower(),length, self.gc(seq))
res += '\n\n'
return res
def frame_nice(self, seq, frame, translation_table = 1):
length = len(seq)
protein = self.frame(seq, frame, translation_table)
res = self.header_nice('Plus one frame translation',seq)
for i in range(0,length,60):
subseq = seq[i:i+60]
p = i/3
res += '%d/%d\n' % (i+1, i/3+1)
res += ' '.join(map(None,protein[p:p+20])) + '\n'
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
return res
def gc(self, seq):
"""Returns a float between 0 and 100."""
return GC(seq)
def gcframe(self, seq, translation_table = 1):
# always use uppercase nt-sequence !!
comp = self.complement(seq)
anti = self.reverse(comp)
length = len(seq)
frames = {}
for i in range(0,3):
frames[i+1] = self.frame1(seq[i:], translation_table)
frames[-(i+1)] = self.reverse(self.frame1(anti[i:], translation_table))
res = self.header_nice('GCFrame', seq)
for i in range(0,length,60):
subseq = seq[i:i+60]
csubseq = comp[i:i+60]
p = i/3
# + frames
res += '%d/%d\n' % (i+1, i/3+1)
res += ' ' + ' '.join(map(None,frames[3][p:p+20])) + '\n'
res += ' ' + ' '.join(map(None,frames[2][p:p+20])) + '\n'
res += ' '.join(map(None,frames[1][p:p+20])) + '\n'
# seq
res += subseq.lower() + '%5d %%\n' % int(self.gc(subseq))
res += csubseq.lower() + '\n'
# - frames
res += ' '.join(map(None,frames[-2][p:p+20])) +' \n'
res += ' ' + ' '.join(map(None,frames[-1][p:p+20])) + '\n'
res += ' ' + ' '.join(map(None,frames[-3][p:p+20])) + '\n\n'
return res
if __name__ == '__main__':
#s = 'GCCCTTTCTTATTAGTGCTACCGCTAATAGGTAAATATGAAAAACCTTTG'
s = 'ATTCCGGTTGATCCTGCCGGACCCGACCGCTATCGGGGTAGGGATAAGCCATGGGAGTCTTACACTCCCGGGTAAGGGAGTGTGGCGGACGGCTGAGTAACACGTGGCTAACCTACCCTCGGGACGGGGATAACCCCGGGAAACTGGGGATAATCCCCGATAGGGAAGGAGTCCTGGAATGGTTCCTTCCCTAAAGGGCTATAGGCTATTTCCCGTTTGTAGCCGCCCGAGGATGGGGCTACGGCCCATCAGGCTGTCGGTGGGGTAAAGGCCCACCGAACCTATAACGGGTAGGGGCCGTGGAAGCGGGAGCCTCCAGTTGGGCACTGAGACAAGGGCCCAGGCCCTACGGGGCGCACCAGGCGCGAAACGTCCCCAATGCGCGAAAGCGTGAGGGCGCTACCCCGAGTGCCTCCGCAAGGAGGCTTTTCCCCGCTCTAAAAAGGCGGGGGAATAAGCGGGGGGCAAGTCTGGTGTCAGCCGCCGCGGTAATACCAGCTCCGCGAGTGGTCGGGGTGATTACTGGGCCTAAAGCGCCTGTAGCCGGCCCACCAAGTCGCCCCTTAAAGTCCCCGGCTCAACCGGGGAACTGGGGGCGATACTGGTGGGCTAGGGGGCGGGAGAGGCGGGGGGTACTCCCGGAGTAGGGGCGAAATCCTTAGATACCGGGAGGACCACCAGTGGCGGAAGCGCCCCGCTA'
test = xbb_translations()
# for i in range(0,4):
# print test.frame1(s[i:])
#print s
#print test.complement(s)
print '============================================================'
print test.gcframe(s)
# for i in Translate.unambiguous_dna_by_id.keys():
# print Translate.unambiguous_dna_by_id[i].table.names[0]