Permalink
Browse files

Switch BLAST XML get_raw to be line based.

This is primarily to ensure get_raw via Bio.SearchIO.index(...) which
uses this method matches get_raw via Bio.SearchIO.index_db(...) which
instead uses the record length computed during indexing.

If we think of the BLAST XML file as being line orientated, then having
the leading and trailing white space of the <Iteration> and </Iteration>
lines makes sense. However, in XML this is technically meaningless free
whitespace.

This fixes the error/warning in test_SearchIO_index.py
  • Loading branch information...
1 parent c7a5ec6 commit 8f9e72b002cc8252716ee8a043e273291af082c3 @peterjc peterjc committed Dec 3, 2012
Showing with 18 additions and 24 deletions.
  1. +10 −20 Bio/SearchIO/BlastIO/blast_xml.py
  2. +8 −4 Tests/test_SearchIO_index.py
@@ -560,28 +560,18 @@ def _parse(self, handle):
def get_raw(self, offset):
qend_mark = self.qend_mark
- block_size = self.block_size
handle = self._handle
handle.seek(offset)
- counter = 0
- qresult_raw = _as_bytes('')
-
- while True:
- block = handle.read(block_size)
-
- # if we reach EOF without encountering any query end mark
- if not block:
- raise ValueError("Query end not found")
-
- qresult_raw += block
- qend_idx = qresult_raw.find(qend_mark)
-
- # if a match is found, return the raw qresult string
- if qend_idx > 0:
- return qresult_raw[:qend_idx + len(qend_mark)]
- # otherwise, increment the counter and go on to the next iteration
- counter += 1
-
+
+ qresult_raw = handle.readline()
+ assert qresult_raw.lstrip().startswith(self.qstart_mark)
+ while qend_mark not in qresult_raw:
+ qresult_raw += handle.readline()
+ assert qresult_raw.rstrip().endswith(qend_mark)
+ assert qresult_raw.count(qend_mark) == 1
+ # Note this will include any leading and trailing whitespace, in
+ # general expecting " <Iteration>\n...\n </Iteration>\n"
+ return qresult_raw
class _BlastXmlGenerator(XMLGenerator):
"""Event-based XML Generator."""
@@ -75,7 +75,8 @@ def test_blastxml_2226_multiple_first(self):
</Statistics>
</Iteration_stat>
<Iteration_message>No hits found</Iteration_message>
- </Iteration>"""
+ </Iteration>
+"""
self.check_raw(filename, "random_s00", raw)
def test_blastxml_2226_multiple_middle(self):
@@ -239,7 +240,8 @@ def test_blastxml_2226_multiple_middle(self):
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
- </Iteration>"""
+ </Iteration>
+"""
self.check_raw(filename, "gi|16080617|ref|NP_391444.1|", raw)
def test_blastxml_2226_multiple_last(self):
@@ -498,7 +500,8 @@ def test_blastxml_2226_multiple_last(self):
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
- </Iteration>"""
+ </Iteration>
+"""
self.check_raw(filename, "gi|11464971:4-101", raw)
def test_blastxml_2226_single(self):
@@ -757,7 +760,8 @@ def test_blastxml_2226_single(self):
<Statistics_entropy>0.14</Statistics_entropy>
</Statistics>
</Iteration_stat>
- </Iteration>"""
+ </Iteration>
+"""
self.check_raw(filename, "gi|11464971:4-101", raw)

0 comments on commit 8f9e72b

Please sign in to comment.