Skip to content

Commit

Permalink
Switch BLAST XML get_raw to be line based.
Browse files Browse the repository at this point in the history
This is primarily to ensure get_raw via Bio.SearchIO.index(...) which
uses this method matches get_raw via Bio.SearchIO.index_db(...) which
instead uses the record length computed during indexing.

If we think of the BLAST XML file as being line orientated, then having
the leading and trailing white space of the <Iteration> and </Iteration>
lines makes sense. However, in XML this is technically meaningless free
whitespace.

This fixes the error/warning in test_SearchIO_index.py
  • Loading branch information
peterjc committed Dec 3, 2012
1 parent c7a5ec6 commit 8f9e72b
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 24 deletions.
30 changes: 10 additions & 20 deletions Bio/SearchIO/BlastIO/blast_xml.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -560,28 +560,18 @@ def _parse(self, handle):


def get_raw(self, offset): def get_raw(self, offset):
qend_mark = self.qend_mark qend_mark = self.qend_mark
block_size = self.block_size
handle = self._handle handle = self._handle
handle.seek(offset) handle.seek(offset)
counter = 0
qresult_raw = _as_bytes('') qresult_raw = handle.readline()

assert qresult_raw.lstrip().startswith(self.qstart_mark)
while True: while qend_mark not in qresult_raw:
block = handle.read(block_size) qresult_raw += handle.readline()

assert qresult_raw.rstrip().endswith(qend_mark)
# if we reach EOF without encountering any query end mark assert qresult_raw.count(qend_mark) == 1
if not block: # Note this will include any leading and trailing whitespace, in
raise ValueError("Query end not found") # general expecting " <Iteration>\n...\n </Iteration>\n"

return qresult_raw
qresult_raw += block
qend_idx = qresult_raw.find(qend_mark)

# if a match is found, return the raw qresult string
if qend_idx > 0:
return qresult_raw[:qend_idx + len(qend_mark)]
# otherwise, increment the counter and go on to the next iteration
counter += 1



class _BlastXmlGenerator(XMLGenerator): class _BlastXmlGenerator(XMLGenerator):
"""Event-based XML Generator.""" """Event-based XML Generator."""
Expand Down
12 changes: 8 additions & 4 deletions Tests/test_SearchIO_index.py
Original file line number Original file line Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def test_blastxml_2226_multiple_first(self):
</Statistics> </Statistics>
</Iteration_stat> </Iteration_stat>
<Iteration_message>No hits found</Iteration_message> <Iteration_message>No hits found</Iteration_message>
</Iteration>""" </Iteration>
"""
self.check_raw(filename, "random_s00", raw) self.check_raw(filename, "random_s00", raw)


def test_blastxml_2226_multiple_middle(self): def test_blastxml_2226_multiple_middle(self):
Expand Down Expand Up @@ -239,7 +240,8 @@ def test_blastxml_2226_multiple_middle(self):
<Statistics_entropy>0.14</Statistics_entropy> <Statistics_entropy>0.14</Statistics_entropy>
</Statistics> </Statistics>
</Iteration_stat> </Iteration_stat>
</Iteration>""" </Iteration>
"""
self.check_raw(filename, "gi|16080617|ref|NP_391444.1|", raw) self.check_raw(filename, "gi|16080617|ref|NP_391444.1|", raw)


def test_blastxml_2226_multiple_last(self): def test_blastxml_2226_multiple_last(self):
Expand Down Expand Up @@ -498,7 +500,8 @@ def test_blastxml_2226_multiple_last(self):
<Statistics_entropy>0.14</Statistics_entropy> <Statistics_entropy>0.14</Statistics_entropy>
</Statistics> </Statistics>
</Iteration_stat> </Iteration_stat>
</Iteration>""" </Iteration>
"""
self.check_raw(filename, "gi|11464971:4-101", raw) self.check_raw(filename, "gi|11464971:4-101", raw)


def test_blastxml_2226_single(self): def test_blastxml_2226_single(self):
Expand Down Expand Up @@ -757,7 +760,8 @@ def test_blastxml_2226_single(self):
<Statistics_entropy>0.14</Statistics_entropy> <Statistics_entropy>0.14</Statistics_entropy>
</Statistics> </Statistics>
</Iteration_stat> </Iteration_stat>
</Iteration>""" </Iteration>
"""
self.check_raw(filename, "gi|11464971:4-101", raw) self.check_raw(filename, "gi|11464971:4-101", raw)




Expand Down

0 comments on commit 8f9e72b

Please sign in to comment.