Permalink
Browse files

fixed a bug that caused a wrong offset when parsing SCF files

fixed the error in the comments parser
added support for code set "2" (IUPAC)

Signed-off-by: Hannes Brandstätter-Müller <hannes.mueller@gmail.com>
  • Loading branch information...
brandstaetter committed Apr 25, 2013
1 parent c6b4d9d commit 55795d87087c4f92b7d6a0882103850c9c5f6863
@@ -256,41 +256,56 @@ public DefaultUncertaintyDecoder() { }
public Symbol decode(byte call) throws IllegalSymbolException {
char c = (char) call;
switch (c) {
case 'a': case 'A':
return DNATools.a();
case 'c': case 'C':
return DNATools.c();
case 'g': case 'G':
return DNATools.g();
case 't': case 'T':
return DNATools.t();
case 'n': case 'N':
return DNATools.n();
case 'm': case 'M':
return DNATools.m();
case 'r': case 'R':
return DNATools.r();
case 'w': case 'W':
return DNATools.w();
case 's': case 'S':
return DNATools.s();
case 'y': case 'Y':
return DNATools.y();
case 'k': case 'K':
return DNATools.k();
case 'v': case 'V':
return DNATools.v();
case 'h': case 'H':
return DNATools.h();
case 'd': case 'D':
return DNATools.d();
case 'b': case 'B':
return DNATools.b();
case '-':
return DNATools.getDNA().getGapSymbol();
default:
throw new IllegalSymbolException("No Symbol for " +
c);
case 'a':
case 'A':
return DNATools.a();
case 'c':
case 'C':
return DNATools.c();
case 'g':
case 'G':
return DNATools.g();
case 't':
case 'T':
return DNATools.t();
case 'n':
case 'N':
return DNATools.n();
case 'm':
case 'M':
return DNATools.m();
case 'r':
case 'R':
return DNATools.r();
case 'w':
case 'W':
return DNATools.w();
case 's':
case 'S':
return DNATools.s();
case 'y':
case 'Y':
return DNATools.y();
case 'k':
case 'K':
return DNATools.k();
case 'v':
case 'V':
return DNATools.v();
case 'h':
case 'H':
return DNATools.h();
case 'd':
case 'D':
return DNATools.d();
case 'b':
case 'B':
return DNATools.b();
case '-':
case '.':
return DNATools.getDNA().getGapSymbol();
default:
throw new IllegalSymbolException("No Symbol for " + c);
}
}
}
@@ -339,7 +354,7 @@ public Symbol decode(byte call) throws IllegalSymbolException {
*/
private static BaseCallUncertaintyDecoder createDecoder(long
codeSet) {
if (codeSet != 0 && codeSet != 4)
if (codeSet != 0 && codeSet != 2 && codeSet != 4)
System.err.println("Warning: the code set (" + codeSet +
") is not specifically supported. (It may still work, though.)");
return new DefaultUncertaintyDecoder();
@@ -358,7 +373,7 @@ public void parse() throws IOException,
BASES = new Integer(1),
COMMENTS = new Integer(2),
PRIVATE = new Integer(3);
TreeMap sectionOrder = new TreeMap();
TreeMap<Long, Integer> sectionOrder = new TreeMap<Long, Integer>();
sectionOrder.put(new Long(header.samples_offset), SAMPLES);
sectionOrder.put(new Long(header.bases_offset), BASES);
if (header.comments_size > 0) {
@@ -368,9 +383,9 @@ public void parse() throws IOException,
sectionOrder.put(new Long(header.private_offset), PRIVATE);
}
for (Iterator it = sectionOrder.keySet().iterator() ;
for (Iterator<Long> it = sectionOrder.keySet().iterator() ;
it.hasNext() ;) {
Integer sect = (Integer) sectionOrder.get(it.next());
Integer sect = sectionOrder.get(it.next());
if (sect == SAMPLES) parseSamples();
else if (sect == BASES) parseBases();
else if (sect == COMMENTS) parseComments();
@@ -387,7 +402,8 @@ protected abstract void parseBases() throws IOException,
protected void parseComments() throws IOException {
skipTo(header.comments_offset);
byte[] raw = new byte[(int)header.comments_size - 1];
din.read(raw, 0, raw.length);
int read = din.read(raw, 0, raw.length);
offset += read;
BufferedReader r = new BufferedReader(
new InputStreamReader(
new ByteArrayInputStream(raw),
@@ -441,7 +457,7 @@ protected final void skipTo(long newOffset) throws IOException {
protected final void createAndSetBaseCallAlignment(List dna, List
offsets, List[] probs) {
try {
Map baseCalls = new SmallMap(9);
Map<Object, SymbolList> baseCalls = new SmallMap(9);
baseCalls.put(Chromatogram.DNA,
out.createImmutableSymbolList(DNATools.getDNA(), dna));
baseCalls.put(Chromatogram.OFFSETS,
@@ -608,10 +624,12 @@ protected void parseBases() throws IOException,
skipTo(header.bases_offset);
int count = (int) header.bases;
List[] probs = new ArrayList[7];
for (int i = 0 ; i < 7 ; i++) probs[i] = new ArrayList(count);
List offsets = new ArrayList(count);
List dna = new ArrayList(count);
List<IntegerAlphabet.IntegerSymbol> offsets = new ArrayList<IntegerAlphabet.IntegerSymbol>(count);
@SuppressWarnings("unchecked")
List<IntegerAlphabet.IntegerSymbol>[] probs = new ArrayList[7];
for (int i = 0; i < 7; i++)
probs[i] = new ArrayList<IntegerAlphabet.IntegerSymbol>(count);
List<Symbol> dna = new ArrayList<Symbol>(count);
long tmp;
try {
@@ -24,6 +24,7 @@
import junit.framework.TestCase;
import org.biojava.bio.chromatogram.UnsupportedChromatogramFormatException;
import org.junit.Assert;
public class SCFTest extends TestCase {
@@ -37,4 +38,19 @@ public void testNormal() throws UnsupportedChromatogramFormatException, IOExcept
SCF.create(resource.openStream(), 0L);
}
public void testOffsetWrong() throws UnsupportedChromatogramFormatException, IOException {
final URL resource = getClass().getResource("offset.scf");
SCF scf = SCF.create(resource.openStream(), 0L);
String expected = "NNNNNNNNGNNNCNNNAGGNNGTCGTCTGCTGCTCGATGTCCTACACATGGACAGGCGCCTTGATNACACCATGCGCCGCGGA"
+ "GGAGAGCAAGCTGCCCATCAATGCGCTGAGCAACTCTTTGCTGCGTAACCATAACATGGTCTATGCCACAACATCCCGCAGCGCAAGCCAACG"
+ "GCAGAAGAAGGTTACCTTTGACAGACTGCAAGTCCTGGACGATCACTACCGGGACGTGCTTAAGGAGGTGAAGGCGAAGGCGTCCACAGTTAA"
+ "GGCTAAACTTCTATCTGTAGAAGAAGCCTGTAAACTGACGCCCCCACATTCGGCCAGATCTAAATTTGGCTATGGGGCAAAGGACGTCCGGAA"
+ "CCTATCCAGCAAGGCCGTTAACCACATCCGCTCCGTGTGGAAGGACTTGCTGGAAGACACTGAGACACCAATTGACACTACCATCATGGCAAA"
+ "AAATGAGGTTTTCTGCGTCCAACCAGAGAAAGGAGGCCGCAAGCCAGCACGCCTTATCGTATTCCCAGATCTGGGAGTTCGTGTGTGCGAGAA"
+ "AATGGCCCTTTATGACGTGGTCTCCACCCTTCCTCAGGCCGTGATGGGCTCCTCATACGGATTCCAGTACTCTCCTGGACAGCGGGTCGAGTT"
+ "CCTGGTGAATGCCTGGAAATCAAAGANAAANCCCCATGGGGTTCTCATATGACACCCGCTGTTTTGACTCAACGGTCACCGAGAGTGATATCC"
+ "GTGTTGAGGAGTCAATTTACCNATGTTGTGACTTGGCCCCCGAAGCCNGACAGGCNATAANN";
Assert.assertEquals(expected, scf.getBaseCalls().symbolListForLabel(SCF.DNA).seqString().toUpperCase());
}
}

0 comments on commit 55795d8

Please sign in to comment.