Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

193 lines (180 sloc) 8.604 kb
# Copyright 2009-2010 by Peter Cock. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Additional unit tests for Bio.SeqIO.convert(...) function."""
import sys
if sys.version_info[0] >= 3:
from Bio import MissingExternalDependencyError
raise MissingExternalDependencyError(\
"Skipping since currently this is very slow on Python 3.")
import os
import unittest
from StringIO import StringIO
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from Bio.SeqIO._index import _FormatToIndexedDict
from Bio.Alphabet import generic_protein, generic_nucleotide, generic_dna
from seq_tests_common import compare_record
class IndexDictTests(unittest.TestCase):
"""Cunning unit test where methods are added at run time."""
def simple_check(self, filename, format, alphabet):
if format in SeqIO._BinaryFormats:
mode = "rb"
else :
mode = "r"
id_list = [rec.id for rec in \
SeqIO.parse(open(filename, mode), format, alphabet)]
rec_dict = SeqIO.index(filename, format, alphabet)
self.assertEqual(set(id_list), set(rec_dict.keys()))
#This is redundant, I just want to make sure len works:
self.assertEqual(len(id_list), len(rec_dict))
#Make sure boolean evaluation works
self.assertEqual(bool(id_list), bool(rec_dict))
for key in id_list:
self.assertTrue(key in rec_dict)
self.assertEqual(key, rec_dict[key].id)
self.assertEqual(key, rec_dict.get(key).id)
#Check non-existant keys,
try:
rec = rec_dict[chr(0)]
raise ValueError("Accessing a non-existent key should fail")
except KeyError:
pass
self.assertEqual(rec_dict.get(chr(0)), None)
self.assertEqual(rec_dict.get(chr(0), chr(1)), chr(1))
if hasattr(dict, "iteritems"):
#Python 2.x
for key, rec in rec_dict.iteritems():
self.assertTrue(key in id_list)
self.assertTrue(isinstance(rec, SeqRecord))
self.assertEqual(rec.id, key)
#Now check non-defined methods...
self.assertRaises(NotImplementedError, rec_dict.items)
self.assertRaises(NotImplementedError, rec_dict.values)
else:
#Python 3
assert not hasattr(rec_dict, "iteritems")
for key, rec in rec_dict.iteritems():
self.assertTrue(key in id_list)
self.assertTrue(isinstance(rec, SeqRecord))
self.assertEqual(rec.id, key)
for rec in rec_dict.itervalues():
self.assertTrue(key in id_list)
self.assertTrue(isinstance(rec, SeqRecord))
self.assertRaises(NotImplementedError, rec_dict.popitem)
self.assertRaises(NotImplementedError, rec_dict.pop, chr(0))
self.assertRaises(NotImplementedError, rec_dict.pop, chr(0), chr(1))
self.assertRaises(NotImplementedError, rec_dict.clear)
self.assertRaises(NotImplementedError, rec_dict.__setitem__, "X", None)
self.assertRaises(NotImplementedError, rec_dict.copy)
self.assertRaises(NotImplementedError, rec_dict.fromkeys, [])
#Done
def get_raw_check(self, filename, format, alphabet):
if format in SeqIO._BinaryFormats:
#This means SFF at the moment, which does not get
#implement the get_raw method
return
handle = open(filename, "rU")
raw_file = handle.read()
handle.close()
#Also checking the key_function here
id_list = [rec.id.lower() for rec in \
SeqIO.parse(filename, format, alphabet)]
rec_dict = SeqIO.index(filename, format, alphabet,
key_function = lambda x : x.lower())
self.assertEqual(set(id_list), set(rec_dict.keys()))
self.assertEqual(len(id_list), len(rec_dict))
for key in id_list:
self.assertTrue(key in rec_dict)
self.assertEqual(key, rec_dict[key].id.lower())
self.assertEqual(key, rec_dict.get(key).id.lower())
raw = rec_dict.get_raw(key)
self.assertTrue(raw.strip())
self.assertTrue(raw in raw_file)
if format in ["ig"]:
#These have a header structure and can't be parsed
#individually (at least, not right now).
continue
rec1 = rec_dict[key]
rec2 = SeqIO.read(StringIO(raw), format, alphabet)
self.assertEqual(True, compare_record(rec1, rec2))
def test_duplicates_index(self):
"""Index file with duplicate identifers with Bio.SeqIO.index()"""
self.assertRaises(ValueError, SeqIO.index, "Fasta/dups.fasta", "fasta")
def test_duplicates_to_dict(self):
"""Index file with duplicate identifers with Bio.SeqIO.to_dict()"""
handle = open("Fasta/dups.fasta", "rU")
iterator = SeqIO.parse(handle, "fasta")
self.assertRaises(ValueError, SeqIO.to_dict, iterator)
handle.close()
tests = [
("Ace/contig1.ace", "ace", generic_dna),
("Ace/consed_sample.ace", "ace", None),
("Ace/seq.cap.ace", "ace", generic_dna),
("Quality/wrapping_original_sanger.fastq", "fastq", None),
("Quality/example.fastq", "fastq", None),
("Quality/example.fastq", "fastq-sanger", generic_dna),
("Quality/tricky.fastq", "fastq", generic_nucleotide),
("Quality/sanger_faked.fastq", "fastq-sanger", generic_dna),
("Quality/solexa_faked.fastq", "fastq-solexa", generic_dna),
("Quality/illumina_faked.fastq", "fastq-illumina", generic_dna),
("EMBL/U87107.embl", "embl", None),
("EMBL/TRBG361.embl", "embl", None),
("EMBL/A04195.imgt", "embl", None), #Not a proper EMBL file, an IMGT file
("GenBank/NC_000932.faa", "fasta", generic_protein),
("GenBank/NC_005816.faa", "fasta", generic_protein),
("GenBank/NC_005816.tsv", "tab", generic_protein),
("GenBank/NC_005816.ffn", "fasta", generic_dna),
("GenBank/NC_005816.fna", "fasta", generic_dna),
("GenBank/NC_005816.gb", "gb", None),
("GenBank/cor6_6.gb", "genbank", None),
("IntelliGenetics/vpu_nucaligned.txt", "ig", generic_nucleotide),
("IntelliGenetics/TAT_mase_nuc.txt", "ig", None),
("IntelliGenetics/VIF_mase-pro.txt", "ig", generic_protein),
("Phd/phd1", "phd", generic_dna),
("Phd/phd2", "phd", None),
("Phd/phd_solexa", "phd", generic_dna),
("Phd/phd_454", "phd", generic_dna),
("NBRF/B_nuc.pir", "pir", generic_nucleotide),
("NBRF/Cw_prot.pir", "pir", generic_protein),
("NBRF/clustalw.pir", "pir", None),
("SwissProt/sp001", "swiss", None),
("SwissProt/sp010", "swiss", None),
("SwissProt/sp016", "swiss", None),
("Roche/E3MFGYR02_random_10_reads.sff", "sff", generic_dna),
("Roche/E3MFGYR02_random_10_reads.sff", "sff-trim", generic_dna),
("Roche/E3MFGYR02_index_at_start.sff", "sff", generic_dna),
("Roche/E3MFGYR02_index_in_middle.sff", "sff", generic_dna),
("Roche/E3MFGYR02_alt_index_at_start.sff", "sff", generic_dna),
("Roche/E3MFGYR02_alt_index_in_middle.sff", "sff", generic_dna),
("Roche/E3MFGYR02_alt_index_at_end.sff", "sff", generic_dna),
("Roche/E3MFGYR02_no_manifest.sff", "sff", generic_dna),
("Roche/greek.sff", "sff", generic_nucleotide),
("Roche/greek.sff", "sff-trim", generic_nucleotide),
("Roche/paired.sff", "sff", None),
("Roche/paired.sff", "sff-trim", None),
]
for filename, format, alphabet in tests:
assert format in _FormatToIndexedDict
def funct(fn,fmt,alpha):
f = lambda x : x.simple_check(fn, fmt, alpha)
f.__doc__ = "Index %s file %s" % (fmt, fn)
return f
setattr(IndexDictTests, "test_%s_%s" \
% (filename.replace("/","_").replace(".","_"), format),
funct(filename, format, alphabet))
del funct
if format in SeqIO._BinaryFormats:
continue
def funct(fn,fmt,alpha):
f = lambda x : x.get_raw_check(fn, fmt, alpha)
f.__doc__ = "Index %s file %s get_raw" % (fmt, fn)
return f
setattr(IndexDictTests, "test_%s_%s_get_raw" \
% (filename.replace("/","_").replace(".","_"), format),
funct(filename, format, alphabet))
del funct
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity = 2)
unittest.main(testRunner=runner)
Jump to Line
Something went wrong with that request. Please try again.