Skip to content

Commit

Permalink
Implement length for IntelliGenetics indexing
Browse files Browse the repository at this point in the history
  • Loading branch information
peterjc committed Jun 21, 2012
1 parent b77d72e commit 7f195e2
Showing 1 changed file with 9 additions and 2 deletions.
11 changes: 9 additions & 2 deletions Bio/SeqIO/_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ def __init__(self, filename, format, alphabet, key_function):
#Note - we don't store the length because I want to minimise the
#memory requirements. With the SQLite backend the length is kept
#and is used to speed up the get_raw method (by about 3 times).
#The length should be provided by all the current backends except
#SFF where there is an existing Roche index we can reuse (very fast
#but lacks the record lengths)
#assert length or format in ["sff", "sff-trim"], \
# "%s at offset %i given length %r (%s format %s)" \
# % (key, offset, length, filename, format)
if key in offsets:
self._proxy._handle.close()
raise ValueError("Duplicate key '%s'" % key)
Expand Down Expand Up @@ -916,21 +922,22 @@ def __iter__(self):
while True:
offset = handle.tell()
line = handle.readline()
length = len(line)
if marker_re.match(line):
#Now look for the first line which doesn't start ";"
while True:
line = handle.readline()
if line[0:1] != semi_char and line.strip():
key = line.split()[0]
yield _bytes_to_string(key), offset, 0
yield _bytes_to_string(key), offset, length
break
if not line:
raise ValueError("Premature end of file?")
length += len(line)
elif not line:
#End of file
break


def get_raw(self, offset):
handle = self._handle
handle.seek(offset)
Expand Down

0 comments on commit 7f195e2

Please sign in to comment.