Permalink
Browse files

Refactor _read_forward from HmmerIO to SearchIO._utils

  • Loading branch information...
1 parent 9d9bc22 commit ef9f50db6d27c1f51f28722c914632d4b17ba429 @bow bow committed Dec 8, 2012
Showing with 28 additions and 27 deletions.
  1. +15 −27 Bio/SearchIO/HmmerIO/hmmer3_text.py
  2. +13 −0 Bio/SearchIO/_utils.py
@@ -11,6 +11,7 @@
from Bio.Alphabet import generic_protein
from Bio.SearchIO._index import SearchIndexer
from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+from Bio.SearchIO._utils import read_forward
__all__ = ['Hmmer3TextParser', 'Hmmer3TextIndexer']
@@ -34,26 +35,13 @@
_HRE_ID_LINE = re.compile(r'^(\s+\S+\s+[0-9-]+ )(.+?)(\s+[0-9-]+)')
-def _read_forward(handle):
- """Reads through whitespaces, returns the first non-whitespace line."""
- while True:
- line = handle.readline()
- # if line has characters and stripping does not remove them,
- # return the line
- if line and line.strip():
- return line
- # if line ends, return None
- elif not line:
- return line
-
-
class Hmmer3TextParser(object):
"""Parser for the HMMER 3.0 text output."""
def __init__(self, handle):
self.handle = handle
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
self._meta = self._parse_preamble()
def __iter__(self):
@@ -66,7 +54,7 @@ def _read_until(self, bool_func):
if not self.line or bool_func(self.line):
return
else:
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
def _parse_preamble(self):
"""Parses HMMER preamble (lines beginning with '#')."""
@@ -106,7 +94,7 @@ def _parse_preamble(self):
else:
meta[regx.group(1)] = regx.group(2)
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
return meta
@@ -131,7 +119,7 @@ def _parse_qresult(self):
# get description and accession, if they exist
desc = '' # placeholder
while not self.line.startswith('Scores for '):
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
if self.line.startswith('Accession:'):
acc = self.line.strip().split(' ', 1)[1]
@@ -147,21 +135,21 @@ def _parse_qresult(self):
# TODO: parse and store this information?
if self.line.startswith('Internal pipeline'):
while self.line and '//' not in self.line:
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
# create qresult, set its attributes and yield
qresult = QueryResult(qid, hits=hit_list)
for attr, value in qresult_attrs.items():
setattr(qresult, attr, value)
yield qresult
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
def _parse_hit(self, qid):
"""Parses a HMMER3 hit block, beginning with the hit table."""
# get to the end of the hit table delimiter and read one more line
self._read_until(lambda line:
line.startswith(' ------- ------ -----'))
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
# assume every hit is in inclusion threshold until the inclusion
# threshold line is encountered
@@ -174,13 +162,13 @@ def _parse_hit(self, qid):
return hit_list
elif self.line.startswith(' ------ inclusion'):
is_included = False
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
# if there are no hits, then there are no hsps
# so we forward-read until 'Internal pipeline..'
elif self.line.startswith(' [No hits detected that satisfy '
'reporting'):
while True:
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
if self.line.startswith('Internal pipeline'):
assert len(hit_list) == 0
return hit_list
@@ -213,7 +201,7 @@ def _parse_hit(self, qid):
}
hit_list.append(hit_attrs)
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
def _create_hits(self, hit_attrs, qid):
"""Parses a HMMER3 hsp block, beginning with the hsp table."""
@@ -234,7 +222,7 @@ def _create_hits(self, hit_attrs, qid):
# read through the hsp table header and move one more line
self._read_until(lambda line:
line.startswith(' --- ------ ----- --------'))
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
# parse the hsp table for the current hit
hsp_list = []
@@ -302,15 +290,15 @@ def _create_hits(self, hit_attrs, qid):
hsp.acc_avg = float(parsed[15])
hsp_list.append(hsp)
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
# parse the hsp alignments
if self.line.startswith(' Alignments for each domain:'):
self._parse_aln_block(hid, hit.hsps)
def _parse_aln_block(self, hid, hsp_list):
"""Parses a HMMER3 HSP alignment block."""
- self.line = _read_forward(self.handle)
+ self.line = read_forward(self.handle)
dom_counter = 0
while True:
if self.line.startswith('>>') or \
@@ -394,7 +382,7 @@ def __iter__(self):
start_offset = handle.tell()
while True:
- line = _read_forward(handle)
+ line = read_forward(handle)
end_offset = handle.tell()
if line.startswith(self.qresult_start):
View
@@ -138,6 +138,19 @@ def setter(self, value):
return property(fget=getter, fset=setter, doc=doc)
+def read_forward(handle):
+ """Reads through whitespaces, returns the first non-whitespace line."""
+ while True:
+ line = handle.readline()
+ # if line has characters and stripping does not remove them,
+ # return the line
+ if line and line.strip():
+ return line
+ # if line ends, return None
+ elif not line:
+ return line
+
+
def trim_str(string, max_len, concat_char):
"""Truncates the given string for display."""
if len(string) > max_len:

0 comments on commit ef9f50d

Please sign in to comment.