Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 162 lines (152 sloc) 5.898 kb
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
1 # Copyright 2003, 2007 by Sebastian Bassi. sbassi@genesdigitales.com
2 # All rights reserved. This code is part of the Biopython
3 # distribution and governed by its license.
4 # Please see the LICENSE file that should have been included as part
5 # of this package.
6
7 import math
8
9 def lcc_mult(seq,wsize):
10 """Local Composition Complexity (LCC) values over sliding window.
11
12 Returns a list of floats, the LCC values for a sliding window over
13 the sequence.
14
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
15 seq - an unambiguous DNA sequence (a string or Seq object)
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
16 wsize - window size, integer
17
18 The result is the same as applying lcc_simp multiple times, but this
19 version is optimized for speed. The optimization works by using the
20 value of previous window as a base to compute the next one."""
70fc5ed @sbassi Mostly formatting changes
sbassi authored
21 l2 = math.log(2)
22 tamseq = len(seq)
a7f5a3b @peterjc No code changes. Removing white space before ':' character to match P…
peterjc authored
23 try:
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
24 #Assume its a string
25 upper = seq.upper()
a7f5a3b @peterjc No code changes. Removing white space before ':' character to match P…
peterjc authored
26 except AttributeError:
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
27 #Should be a Seq object then
28 upper = seq.tostring().upper()
70fc5ed @sbassi Mostly formatting changes
sbassi authored
29 compone = [0]
30 lccsal = [0]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
31 for i in range(wsize):
32 compone.append(((i+1)/float(wsize))*
33 ((math.log((i+1)/float(wsize)))/l2))
70fc5ed @sbassi Mostly formatting changes
sbassi authored
34 window = seq[0:wsize]
35 cant_a = window.count('A')
36 cant_c = window.count('C')
37 cant_t = window.count('T')
38 cant_g = window.count('G')
39 term_a = compone[cant_a]
40 term_c = compone[cant_c]
41 term_t = compone[cant_t]
42 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
43 lccsal.append(-(term_a+term_c+term_t+term_g))
70fc5ed @sbassi Mostly formatting changes
sbassi authored
44 tail = seq[0]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
45 for x in range (tamseq-wsize):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
46 window = upper[x+1:wsize+x+1]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
47 if tail==window[-1]:
48 lccsal.append(lccsal[-1])
49 elif tail=='A':
70fc5ed @sbassi Mostly formatting changes
sbassi authored
50 cant_a -= 1
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
51 if window.endswith('C'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
52 cant_c += 1
53 term_a = compone[cant_a]
54 term_c = compone[cant_c]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
55 lccsal.append(-(term_a+term_c+term_t+term_g))
56 elif window.endswith('T'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
57 cant_t += 1
58 term_a = compone[cant_a]
59 term_t = compone[cant_t]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
60 lccsal.append(-(term_a+term_c+term_t+term_g))
61 elif window.endswith('G'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
62 cant_g += 1
63 term_a = compone[cant_a]
64 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
65 lccsal.append(-(term_a+term_c+term_t+term_g))
66 elif tail=='C':
70fc5ed @sbassi Mostly formatting changes
sbassi authored
67 cant_c -= 1
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
68 if window.endswith('A'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
69 cant_a += 1
70 term_a = compone[cant_a]
71 term_c = compone[cant_c]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
72 lccsal.append(-(term_a+term_c+term_t+term_g))
73 elif window.endswith('T'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
74 cant_t += 1
75 term_c = compone[cant_c]
76 term_t = compone[cant_t]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
77 lccsal.append(-(term_a+term_c+term_t+term_g))
78 elif window.endswith('G'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
79 cant_g += 1
80 term_c = compone[cant_c]
81 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
82 lccsal.append(-(term_a+term_c+term_t+term_g))
83 elif tail=='T':
70fc5ed @sbassi Mostly formatting changes
sbassi authored
84 cant_t -= 1
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
85 if window.endswith('A'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
86 cant_a += 1
87 term_a = compone[cant_a]
88 term_t = compone[cant_t]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
89 lccsal.append(-(term_a+term_c+term_t+term_g))
90 elif window.endswith('C'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
91 cant_c += 1
92 term_c = compone[cant_c]
93 term_t = compone[cant_t]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
94 lccsal.append(-(term_a+term_c+term_t+term_g))
95 elif window.endswith('G'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
96 cant_g += 1
97 term_t = compone[cant_t]
98 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
99 lccsal.append(-(term_a+term_c+term_t+term_g))
100 elif tail=='G':
70fc5ed @sbassi Mostly formatting changes
sbassi authored
101 cant_g -= 1
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
102 if window.endswith('A'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
103 cant_a += 1
104 term_a = compone[cant_a]
105 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
106 lccsal.append(-(term_a+term_c+term_t+term_g))
107 elif window.endswith('C'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
108 cant_c += 1
109 term_c = compone[cant_c]
110 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
111 lccsal.append(-(term_a+term_c+term_t+term_g))
112 elif window.endswith('T'):
70fc5ed @sbassi Mostly formatting changes
sbassi authored
113 cant_t += 1
114 term_t = compone[cant_t]
115 term_g = compone[cant_g]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
116 lccsal.append(-(term_a+term_c+term_t+term_g))
5244e59 @sbassi Minor formatting changes
sbassi authored
117 tail = window[0]
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
118 return lccsal
119
120 def lcc_simp(seq):
121 """Local Composition Complexity (LCC) for a sequence.
122
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
123 seq - an unambiguous DNA sequence (a string or Seq object)
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
124
125 Returns the Local Composition Complexity (LCC) value for the entire
126 sequence (as a float).
127
128 Reference:
129 Andrzej K Konopka (2005) Sequence Complexity and Composition
130 DOI: 10.1038/npg.els.0005260
131 """
5244e59 @sbassi Minor formatting changes
sbassi authored
132 wsize = len(seq)
a7f5a3b @peterjc No code changes. Removing white space before ':' character to match P…
peterjc authored
133 try:
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
134 #Assume its a string
135 upper = seq.upper()
a7f5a3b @peterjc No code changes. Removing white space before ':' character to match P…
peterjc authored
136 except AttributeError:
9147153 @peterjc Cope with Seq or MutableSeq in addition to strings
peterjc authored
137 #Should be a Seq object then
138 upper = seq.tostring().upper()
5244e59 @sbassi Minor formatting changes
sbassi authored
139 l2 = math.log(2)
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
140 if 'A' not in seq:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
141 term_a = 0
b90b224 @peterjc More tabs to spaces, and other minor formatting
peterjc authored
142 # Check to avoid calculating the log of 0.
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
143 else:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
144 term_a = ((upper.count('A'))/float(wsize))*((math.log((upper.count('A'))
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
145 /float(wsize)))/l2)
146 if 'C' not in seq:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
147 term_c = 0
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
148 else:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
149 term_c = ((upper.count('C'))/float(wsize))*((math.log((upper.count('C'))
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
150 /float(wsize)))/l2)
151 if 'T' not in seq:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
152 term_t = 0
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
153 else:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
154 term_t = ((upper.count('T'))/float(wsize))*((math.log((upper.count('T'))
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
155 /float(wsize)))/l2)
156 if 'G' not in seq:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
157 term_g = 0
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
158 else:
70fc5ed @sbassi Mostly formatting changes
sbassi authored
159 term_g = ((upper.count('G'))/float(wsize))*((math.log((upper.count('G'))
3d45bf5 @peterjc Updated Local Composition Complexity (LCC) code from Sebastian Bassi,…
peterjc authored
160 /float(wsize)))/l2)
70fc5ed @sbassi Mostly formatting changes
sbassi authored
161 return -(term_a+term_c+term_t+term_g)
Something went wrong with that request. Please try again.