From c6717f1eb949bad0cad282614ac63a43d3f9c590 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Sun, 9 Dec 2012 06:38:15 +0100
Subject: [PATCH] Add hmmer2-text indexer

---
 Bio/SearchIO/HmmerIO/__init__.py    |  1 +
 Bio/SearchIO/HmmerIO/hmmer2_text.py | 24 ++++++++++++++++++++++++
 Bio/SearchIO/__init__.py            |  5 ++++-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/Bio/SearchIO/HmmerIO/__init__.py b/Bio/SearchIO/HmmerIO/__init__.py
index 54da8307aff..2e4b33ab8fa 100644
--- a/Bio/SearchIO/HmmerIO/__init__.py
+++ b/Bio/SearchIO/HmmerIO/__init__.py
@@ -27,6 +27,7 @@
     - Plain text, v3.0   - 'hmmer3-text'    - parsing, indexing
     - Table, v3.0        - 'hmmer3-tab'     - parsing, indexing, writing
     - Domain table, v3.0 - 'hmmer3-domtab'* - parsing, indexing, writing
+    - Plain text, v2.x   - 'hmmer2-text'    - parsing, indexing
 
 * For the domain table output, due to the way HMMER outputs the sequence
   coordinates, you have to specify what HMMER flavor produced the output as the
diff --git a/Bio/SearchIO/HmmerIO/hmmer2_text.py b/Bio/SearchIO/HmmerIO/hmmer2_text.py
index 0bb1faa267e..ead57466e02 100644
--- a/Bio/SearchIO/HmmerIO/hmmer2_text.py
+++ b/Bio/SearchIO/HmmerIO/hmmer2_text.py
@@ -2,14 +2,22 @@
 # This code is part of the Biopython distribution and governed by its
 # license.  Please see the LICENSE file that should have been included
 # as part of this package.
+
 """Bio.SearchIO parser for HMMER 2 text output."""
 
 import re
+
+from Bio._py3k import _as_bytes
 from Bio.Alphabet import generic_protein
 from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment
 
+from _base import _BaseHmmerTextIndexer
+
+__all__ = ['Hmmer2TextParser', 'Hmmer2TextIndexer']
+
 _HSP_ALIGN_LINE = re.compile(r'(\S+):\s+domain (\d+) of (\d+)')
 
+
 class _HitPlaceholder(object):
     def createHit(self, hsp_list):
         hit = Hit(hsp_list)
@@ -273,3 +281,19 @@ def parse_hsp_alignments(self):
             else:
                 frag.hit = otherseq
                 frag.query = hmmseq
+
+
+class Hmmer2TextIndexer(_BaseHmmerTextIndexer):
+
+    """Indexer for hmmer2-text format."""
+
+    _parser = Hmmer2TextParser
+    qresult_start = _as_bytes('Query ')
+    qresult_end = _as_bytes('//')
+    regex_id = re.compile(_as_bytes(r'Query (?:sequence|HMM):\s*(.*)'))
+
+
+# if not used as a module, run the doctest
+if __name__ == "__main__":
+    from Bio.SearchIO._utils import run_doctest
+    run_doctest()
diff --git a/Bio/SearchIO/__init__.py b/Bio/SearchIO/__init__.py
index 0fdf0174f52..4b9d184224c 100644
--- a/Bio/SearchIO/__init__.py
+++ b/Bio/SearchIO/__init__.py
@@ -178,8 +178,10 @@
  - exonerate-vulgar - Exonerate vulgar line.
  - exonerate-text   - Exonerate cigar line.
  - fasta-m10        - Bill Pearson's FASTA -m 10 output.
- - hmmer3-text      - HMMER regular text output format. Supported HMMER
+ - hmmer3-text      - HMMER3 regular text output format. Supported HMMER3
                       subprograms are hmmscan, hmmsearch, and phmmer.
+ - hmmer2-text      - HMMER2 regular text output format. Supported HMMER2
+                      subprograms are hmmpfam, hmmsearch.
 
 Support for parsing:
 
@@ -242,6 +244,7 @@
         'exonerate-text': ('ExonerateIO', 'ExonerateTextIndexer'),
         'exonerate-vulgar': ('ExonerateIO', 'ExonerateVulgarIndexer'),
         'fasta-m10': ('FastaIO', 'FastaM10Indexer'),
+        'hmmer2-text': ('HmmerIO', 'Hmmer2TextIndexer'),
         'hmmer3-text': ('HmmerIO', 'Hmmer3TextIndexer'),
         'hmmer3-tab': ('HmmerIO', 'Hmmer3TabIndexer'),
         'hmmscan3-domtab': ('HmmerIO', 'Hmmer3DomtabHmmhitIndexer'),