Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

raise Exception if a header is repeated in the fasta (because

pyfasta does not work correctly if that is the case)
  • Loading branch information...
commit f3c1208e8b2f376151026ec2fec394d62fcb9332 1 parent 59068bd
@brentp authored
View
1  MANIFEST.in
@@ -1,3 +1,4 @@
include CHANGELOG.txt
+include README.rst
include tests/*.py
include tests/data/*.fasta
View
2  pyfasta/__init__.py
@@ -1,5 +1,5 @@
import sys
-from fasta import Fasta, complement
+from fasta import Fasta, complement, DuplicateHeaderException
from records import *
from split_fasta import split
import optparse
View
12 pyfasta/fasta.py
@@ -9,6 +9,10 @@
class FastaNotFound(Exception): pass
+class DuplicateHeaderException(Exception):
+ def __init__(self, header):
+ Exception.__init__(self, 'headers must be unique: %s is duplicated' % header)
+
class Fasta(dict):
def __init__(self, fasta_name, record_class=NpyFastaRecord,
flatten_inplace=False, key_fn=None):
@@ -58,7 +62,8 @@ def gen_seqs_with_headers(self, key_fn=None):
and generate starts, stops to be used by the record class"""
fh = open(self.fasta_name, 'r')
# do the flattening (remove newlines)
- idx = {}
+ # check of unique-ness of headers.
+ seen_headers = {}
header = None
seqs = None
for line in fh:
@@ -66,6 +71,9 @@ def gen_seqs_with_headers(self, key_fn=None):
if not line: continue
if line[0] == ">":
if seqs is not None:
+ if header in seen_headers:
+ raise DuplicateHeaderException(header)
+ seen_headers[header] = None
yield header, "".join(seqs)
header = line[1:].strip()
@@ -76,6 +84,8 @@ def gen_seqs_with_headers(self, key_fn=None):
seqs.append(line)
if seqs != []:
+ if header in seen_headers:
+ raise DuplicateHeaderException(header)
yield header, "".join(seqs)
fh.close()
View
2  setup.py
@@ -1,7 +1,7 @@
from setuptools import setup, find_packages
-version = '0.4.0'
+version = '0.4.1'
setup(name='pyfasta',
version=version,
View
4 tests/data/dups.fasta
@@ -0,0 +1,4 @@
+>a
+aaa
+>a
+bbb
View
12 tests/test_all.py
@@ -1,6 +1,8 @@
from pyfasta import Fasta
from pyfasta.records import NpyFastaRecord, MemoryRecord, FastaRecord
record_classes = [NpyFastaRecord, MemoryRecord, FastaRecord]
+from pyfasta import DuplicateHeaderException
+
try:
from pyfasta.records import TCRecord
record_classes.append(TCRecord)
@@ -14,7 +16,7 @@
import glob
def _cleanup():
- for f in glob.glob("tests/data/three_chrs.fasta*"):
+ for f in glob.glob("tests/data/three_chrs.fasta*") + glob.glob('tests/data/dups.fasta.*'):
if f.endswith(".orig"): continue
os.unlink(f)
shutil.copyfile('tests/data/three_chrs.fasta.orig', 'tests/data/three_chrs.fasta')
@@ -47,9 +49,17 @@ def test_classes():
yield check_reload, klass, fasta_name
+ yield check_duplicates, klass, inplace
+
_cleanup()
+def check_duplicates(klass, inplace):
+ assert_raises(DuplicateHeaderException,
+ lambda: Fasta('tests/data/dups.fasta',
+ record_class=klass, flatten_inplace=inplace))
+
+
def check_keys(f):
assert sorted(f.keys()) == ['chr1', 'chr2', 'chr3']
assert sorted(f.iterkeys()) == ['chr1', 'chr2', 'chr3']
Please sign in to comment.
Something went wrong with that request. Please try again.