From c6717f1eb949bad0cad282614ac63a43d3f9c590 Mon Sep 17 00:00:00 2001 From: bow Date: Sun, 9 Dec 2012 06:38:15 +0100 Subject: [PATCH] Add hmmer2-text indexer --- Bio/SearchIO/HmmerIO/__init__.py | 1 + Bio/SearchIO/HmmerIO/hmmer2_text.py | 24 ++++++++++++++++++++++++ Bio/SearchIO/__init__.py | 5 ++++- 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Bio/SearchIO/HmmerIO/__init__.py b/Bio/SearchIO/HmmerIO/__init__.py index 54da8307aff..2e4b33ab8fa 100644 --- a/Bio/SearchIO/HmmerIO/__init__.py +++ b/Bio/SearchIO/HmmerIO/__init__.py @@ -27,6 +27,7 @@ - Plain text, v3.0 - 'hmmer3-text' - parsing, indexing - Table, v3.0 - 'hmmer3-tab' - parsing, indexing, writing - Domain table, v3.0 - 'hmmer3-domtab'* - parsing, indexing, writing + - Plain text, v2.x - 'hmmer2-text' - parsing, indexing * For the domain table output, due to the way HMMER outputs the sequence coordinates, you have to specify what HMMER flavor produced the output as the diff --git a/Bio/SearchIO/HmmerIO/hmmer2_text.py b/Bio/SearchIO/HmmerIO/hmmer2_text.py index 0bb1faa267e..ead57466e02 100644 --- a/Bio/SearchIO/HmmerIO/hmmer2_text.py +++ b/Bio/SearchIO/HmmerIO/hmmer2_text.py @@ -2,14 +2,22 @@ # This code is part of the Biopython distribution and governed by its # license. Please see the LICENSE file that should have been included # as part of this package. + """Bio.SearchIO parser for HMMER 2 text output.""" import re + +from Bio._py3k import _as_bytes from Bio.Alphabet import generic_protein from Bio.SearchIO._model import QueryResult, Hit, HSP, HSPFragment +from _base import _BaseHmmerTextIndexer + +__all__ = ['Hmmer2TextParser', 'Hmmer2TextIndexer'] + _HSP_ALIGN_LINE = re.compile(r'(\S+):\s+domain (\d+) of (\d+)') + class _HitPlaceholder(object): def createHit(self, hsp_list): hit = Hit(hsp_list) @@ -273,3 +281,19 @@ def parse_hsp_alignments(self): else: frag.hit = otherseq frag.query = hmmseq + + +class Hmmer2TextIndexer(_BaseHmmerTextIndexer): + + """Indexer for hmmer2-text format.""" + + _parser = Hmmer2TextParser + qresult_start = _as_bytes('Query ') + qresult_end = _as_bytes('//') + regex_id = re.compile(_as_bytes(r'Query (?:sequence|HMM):\s*(.*)')) + + +# if not used as a module, run the doctest +if __name__ == "__main__": + from Bio.SearchIO._utils import run_doctest + run_doctest() diff --git a/Bio/SearchIO/__init__.py b/Bio/SearchIO/__init__.py index 0fdf0174f52..4b9d184224c 100644 --- a/Bio/SearchIO/__init__.py +++ b/Bio/SearchIO/__init__.py @@ -178,8 +178,10 @@ - exonerate-vulgar - Exonerate vulgar line. - exonerate-text - Exonerate cigar line. - fasta-m10 - Bill Pearson's FASTA -m 10 output. - - hmmer3-text - HMMER regular text output format. Supported HMMER + - hmmer3-text - HMMER3 regular text output format. Supported HMMER3 subprograms are hmmscan, hmmsearch, and phmmer. + - hmmer2-text - HMMER2 regular text output format. Supported HMMER2 + subprograms are hmmpfam, hmmsearch. Support for parsing: @@ -242,6 +244,7 @@ 'exonerate-text': ('ExonerateIO', 'ExonerateTextIndexer'), 'exonerate-vulgar': ('ExonerateIO', 'ExonerateVulgarIndexer'), 'fasta-m10': ('FastaIO', 'FastaM10Indexer'), + 'hmmer2-text': ('HmmerIO', 'Hmmer2TextIndexer'), 'hmmer3-text': ('HmmerIO', 'Hmmer3TextIndexer'), 'hmmer3-tab': ('HmmerIO', 'Hmmer3TabIndexer'), 'hmmscan3-domtab': ('HmmerIO', 'Hmmer3DomtabHmmhitIndexer'),