Skip to content
Browse files

Refactor common index file opening with BGZF into (private) shared fu…

…nction
  • Loading branch information...
1 parent 152202d commit 7420310ccb66ab184dc116052c3759edfe6152cb @peterjc peterjc committed
Showing with 20 additions and 18 deletions.
  1. +16 −0 Bio/File.py
  2. +2 −9 Bio/SearchIO/_index.py
  3. +2 −9 Bio/SeqIO/_index.py
View
16 Bio/File.py
@@ -85,6 +85,22 @@ def as_handle(handleish, mode='r', **kwargs):
else:
yield handleish
+def _open_for_random_access(filename):
+ """Open a file in binary mode, spot if it is BGZF format etc (PRIVATE).
+
+ This funcationality is used by the Bio.SeqIO and Bio.SearchIO index
+ and index_db functions.
+ """
+ handle = open(filename, "rb")
+ import bgzf
+ try:
+ return bgzf.BgzfReader(mode="rb", fileobj=handle)
+ except ValueError, e:
+ assert "BGZF" in str(e)
+ #Not a BGZF file after all, rewind to start:
+ handle.seek(0)
+ return handle
+
class UndoHandle(object):
"""A Python handle that adds functionality for saving lines.
View
11 Bio/SearchIO/_index.py
@@ -11,7 +11,7 @@
from StringIO import StringIO
from Bio._py3k import _bytes_to_string
from Bio import bgzf
-from Bio.File import _IndexedSeqFileProxy
+from Bio.File import _IndexedSeqFileProxy, _open_for_random_access
class SearchIndexer(_IndexedSeqFileProxy):
@@ -22,14 +22,7 @@ class SearchIndexer(_IndexedSeqFileProxy):
"""
def __init__(self, filename, **kwargs):
- h = open(filename, 'rb')
- try:
- self._handle = bgzf.BgzfReader(mode="rb", fileobj=h)
- except ValueError, e:
- assert "BGZF" in str(e)
- #Not a BGZF file
- h.seek(0)
- self._handle = h
+ self._handle = _open_for_random_access(filename)
self._kwargs = kwargs
def _parse(self, handle):
View
11 Bio/SeqIO/_index.py
@@ -31,19 +31,12 @@
from Bio import SeqIO
from Bio import Alphabet
from Bio import bgzf
-from Bio.File import _IndexedSeqFileProxy
+from Bio.File import _IndexedSeqFileProxy, _open_for_random_access
class SeqFileRandomAccess(_IndexedSeqFileProxy):
def __init__(self, filename, format, alphabet):
- h = open(filename, "rb")
- try:
- self._handle = bgzf.BgzfReader(mode="rb", fileobj=h)
- except ValueError, e:
- assert "BGZF" in str(e)
- #Not a BGZF file
- h.seek(0)
- self._handle = h
+ self._handle = _open_for_random_access(filename)
self._alphabet = alphabet
self._format = format
#Load the parser class/function once an avoid the dict lookup in each

0 comments on commit 7420310

Please sign in to comment.
Something went wrong with that request. Please try again.