Permalink
Browse files

Add hmmer2-text indexer

  • Loading branch information...
1 parent be6b144 commit c6717f1eb949bad0cad282614ac63a43d3f9c590 @bow bow committed Dec 9, 2012
Showing with 29 additions and 1 deletion.
  1. +1 −0 Bio/SearchIO/HmmerIO/__init__.py
  2. +24 −0 Bio/SearchIO/HmmerIO/hmmer2_text.py
  3. +4 −1 Bio/SearchIO/__init__.py
@@ -27,6 +27,7 @@
- Plain text, v3.0 - 'hmmer3-text' - parsing, indexing
- Table, v3.0 - 'hmmer3-tab' - parsing, indexing, writing
- Domain table, v3.0 - 'hmmer3-domtab'* - parsing, indexing, writing
+ - Plain text, v2.x - 'hmmer2-text' - parsing, indexing
* For the domain table output, due to the way HMMER outputs the sequence
coordinates, you have to specify what HMMER flavor produced the output as the
@@ -2,14 +2,22 @@
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
+
"""Bio.SearchIO parser for HMMER 2 text output."""
import re
+
+from Bio._py3k import _as_bytes
from Bio.Alphabet import generic_protein
from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
+from _base import _BaseHmmerTextIndexer
+
+__all__ = ['Hmmer2TextParser', 'Hmmer2TextIndexer']
+
_HSP_ALIGN_LINE = re.compile(r'(\S+):\s+domain (\d+) of (\d+)')
+
class _HitPlaceholder(object):
def createHit(self, hsp_list):
hit = Hit(hsp_list)
@@ -273,3 +281,19 @@ def parse_hsp_alignments(self):
else:
frag.hit = otherseq
frag.query = hmmseq
+
+
+class Hmmer2TextIndexer(_BaseHmmerTextIndexer):
+
+ """Indexer for hmmer2-text format."""
+
+ _parser = Hmmer2TextParser
+ qresult_start = _as_bytes('Query ')
+ qresult_end = _as_bytes('//')
+ regex_id = re.compile(_as_bytes(r'Query (?:sequence|HMM):\s*(.*)'))
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+ from Bio.SearchIO._utils import run_doctest
+ run_doctest()
View
@@ -178,8 +178,10 @@
- exonerate-vulgar - Exonerate vulgar line.
- exonerate-text - Exonerate cigar line.
- fasta-m10 - Bill Pearson's FASTA -m 10 output.
- - hmmer3-text - HMMER regular text output format. Supported HMMER
+ - hmmer3-text - HMMER3 regular text output format. Supported HMMER3
subprograms are hmmscan, hmmsearch, and phmmer.
+ - hmmer2-text - HMMER2 regular text output format. Supported HMMER2
+ subprograms are hmmpfam, hmmsearch.
Support for parsing:
@@ -242,6 +244,7 @@
'exonerate-text': ('ExonerateIO', 'ExonerateTextIndexer'),
'exonerate-vulgar': ('ExonerateIO', 'ExonerateVulgarIndexer'),
'fasta-m10': ('FastaIO', 'FastaM10Indexer'),
+ 'hmmer2-text': ('HmmerIO', 'Hmmer2TextIndexer'),
'hmmer3-text': ('HmmerIO', 'Hmmer3TextIndexer'),
'hmmer3-tab': ('HmmerIO', 'Hmmer3TabIndexer'),
'hmmscan3-domtab': ('HmmerIO', 'Hmmer3DomtabHmmhitIndexer'),

0 comments on commit c6717f1

Please sign in to comment.