/
BitSetDNASequence.java
203 lines (168 loc) · 4.55 KB
/
BitSetDNASequence.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
package dnaModels;
import java.util.BitSet;
import siteModels.CodonUtils;
import siteModels.CodonUtils.AminoAcid;
import mutationModels.MutationModel;
import cern.jet.random.Uniform;
import cern.jet.random.engine.RandomEngine;
/**
* An implementation of DNA sequence that uses two bitsets to store the data. Currently the fastest model.
* @author brendan
*
*/
public class BitSetDNASequence extends DNASequence {
BitSet bits1;
BitSet bits2;
BitSetPool pool = null;
RandomEngine rng;
/**
* Constucts a new DNA sequence using the stationary frequencies from the given mutation model
* @param rng A random number generator
* @param theLength The length of this DNA sequence
* @param mm The mutation model associated with this DNA sequence.
*/
public BitSetDNASequence(RandomEngine rng, int theLength, MutationModel mm) {
super(rng, theLength, mm);
this.rng = rng;
bits1 = new BitSet(length);
bits2 = new BitSet(length);
Uniform uniGen = new Uniform(rng);
for(int i=0; i<length; i++) {
double r = uniGen.nextDouble();
if (r<mm.getPiA())
setBaseChar(i, 'A');
else if (r<(mm.getPiA()+mm.getPiG()))
setBaseChar(i, 'G');
else if (r<(mm.getPiA()+mm.getPiG()+mm.getPiC()))
setBaseChar(i, 'C');
else
setBaseChar(i, 'T');
}
}
/**
* This constructor makes a DNA sequence that looks sort of like an ORF, it starts with a start and contains no stop codons
* @param rng
* @param theLength
* @param mm
* @param useCodonsFlag
*/
public BitSetDNASequence(RandomEngine rng, int theLength, MutationModel mm, CodonUtils useCodonsFlag) {
super(rng, theLength, mm);
this.rng = rng;
bits1 = new BitSet(length);
bits2 = new BitSet(length);
Uniform uniGen = new Uniform(rng);
//First make a string with a bunch of non-stop codons
StringBuilder seqStr = new StringBuilder();
seqStr.append("ATG");
while(seqStr.length() < theLength) {
String codon = makeNewCodon(mm, uniGen);
seqStr.append(codon);
}
//Then actually set the bits based on the characters in the string
for(int i=0; i<length; i++) {
setBaseChar(i, seqStr.charAt(i));
}
}
private BitSetDNASequence(BitSet b1, BitSet b2, int theLength, MutationModel mm /* BitSetPool pool */) {
super(null, theLength, mm);
bits1 = b1;
bits2 = b2;
mutationModel = mm;
this.length = theLength;
}
/**
* Returns a char representing the base at a given position
* @param which The site to get the char for
* @return A char representing the base
*/
public char getBaseChar(int which) {
if (bits1.get(which)) {
if (bits2.get(which)) return 'A';
else return 'G';
}
else {
if (bits2.get(which)) return 'C';
else return 'T';
}
}
/**
* Sets a DNA sequence based at a particular site based on a char representing the base
* @param which The site to set the base at
* @param base The base (A,C,G,or T), must be in uppercase
*/
public void setBaseChar(int which, char base) {
switch(base) {
case 'A' : bits1.set(which, true);
bits2.set(which, true);
break;
case 'G' : bits1.set(which, true);
bits2.set(which, false);
break;
case 'C' : bits1.set(which, false);
bits2.set(which, true);
break;
case 'T' : bits1.set(which, false);
bits2.set(which, false);
break;
}
}
/**
* Clones this DNA sequence, but does not deep-copy anything else, such as the mutation model.
* Something like 50% of the simulation run time is spent here.
*/
public Object clone() {
BitSet newB1;
BitSet newB2;
if (pool!=null) {
newB1 = pool.getNew();
newB1.clear();
newB1.or(bits1);
newB2 = pool.getNew();
newB2.clear();
newB2.or(bits2);
}
else {
newB1 = (BitSet)bits1.clone();
newB2 = (BitSet)bits2.clone();
}
return new BitSetDNASequence(newB1, newB2, length, mutationModel);
}
/**
* Used in the experimental BitSetPool implementation, which is currently not in use
*/
public void retireBits() {
if (pool!=null) {
pool.retire(bits1);
pool.retire(bits2);
}
}
public BitSet getBits1() {
return bits1;
}
public BitSet getBits2() {
return bits2;
}
public DNASequence getCopy() {
return (DNASequence)this.clone();
}
public Double getDoubleValue() {
return null;
}
/**
* A string representation of this sequence
*/
public String getStringValue() {
StringBuffer buf = new StringBuffer();
for(int i=0; i<length; i++) {
buf.append( getBaseChar(i));
}
return buf.toString();
}
/**
* OK, I guess we can just use toString as well
*/
public String toString() {
return getStringValue();
}
}