Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Newer
Older
100644 649 lines (569 sloc) 26.849 kb
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
1 # Copyright 2009-2012 by Peter Cock. All rights reserved.
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
2 # This code is part of the Biopython distribution and governed by its
3 # license. Please see the LICENSE file that should have been included
4 # as part of this package.
5
391b511 @peterjc Rename Bio.SeqIO.index_many() to index_db() as discussed on mailing list
peterjc authored
6 """Unit tests for Bio.SeqIO.index(...) and index_db() functions."""
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
7
39781ec @peterjc Cope with missing sqlite3 (e.g. Python 2.4)
peterjc authored
8 try:
9 import sqlite3
10 except ImportError:
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
11 # Try to run what tests we can on Python 2.4 or Jython
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
12 # where we don't expect this to be installed.
39781ec @peterjc Cope with missing sqlite3 (e.g. Python 2.4)
peterjc authored
13 sqlite3 = None
14
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
15 import sys
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
16 import os
17 import unittest
67a3ef3 @peterjc Use system temp for test_SeqIO_index.py index files
peterjc authored
18 import tempfile
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
19 import gzip
4e2ed2b @peterjc Silence SFF indexing test warning for .diy dummy index
peterjc authored
20 import warnings
7dd6eed @peterjc Can assume BytesIO available
peterjc authored
21 from io import BytesIO
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
22
b09ebbf @peterjc Import StringIO via _py3k module in tests
peterjc authored
23 from Bio._py3k import _as_bytes, _bytes_to_string, StringIO
9aa50e5 @peterjc Avoid open(filename, 'rU') on Python 3
peterjc authored
24 from Bio._py3k import _universal_read_mode
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
25
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
26 try:
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
27 # Defined on Python 3
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
28 FileNotFoundError
29 except NameError:
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
30 # Python 2 does not have this,
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
31 FileNotFoundError = IOError
32
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
33 from Bio.SeqRecord import SeqRecord
34 from Bio import SeqIO
c7b409e @peterjc Bio.SeqIO.index() refactor (manual merge from branch)
peterjc authored
35 from Bio.SeqIO._index import _FormatToRandomAccess
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
36 from Bio.Alphabet import generic_protein, generic_nucleotide, generic_dna
37
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
38 from seq_tests_common import compare_record
39
4e2ed2b @peterjc Silence SFF indexing test warning for .diy dummy index
peterjc authored
40 from Bio import BiopythonParserWarning
fca34a9 @peterjc Skip BGZF tests in test_SeqIO_index.py if issue 17666
peterjc authored
41 from Bio import MissingPythonDependencyError
42 try:
43 from test_bgzf import _have_bug17666
44 do_bgzf = _have_bug17666()
45 except MissingPythonDependencyError:
46 do_bgzf = False
fd82a0a @cbrueffer Add blank lines where needed (PEP8 E302).
cbrueffer authored
47
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
48 CUR_DIR = os.getcwd()
49
e4f4209 @cbrueffer Fix PEP8 E3 issues (too few or too many blank lines).
cbrueffer authored
50
5797e38 @peterjc Fix Bio.SeqIO.index() for SFF files using key_function
peterjc authored
51 def add_prefix(key):
52 """Dummy key_function for testing index code."""
53 return "id_" + key
54
fd82a0a @cbrueffer Add blank lines where needed (PEP8 E302).
cbrueffer authored
55
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
56 def gzip_open(filename, format):
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
57 # At time of writing, under Python 3.2.2 seems gzip.open(filename, mode)
58 # insists on giving byte strings (i.e. binary mode)
59 # See http://bugs.python.org/issue13989
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
60 if sys.version_info[0] < 3 or format in SeqIO._BinaryFormats:
61 return gzip.open(filename)
62 handle = gzip.open(filename)
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
63 data = handle.read() # bytes!
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
64 handle.close()
65 return StringIO(_bytes_to_string(data))
66
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
67
68 if sqlite3:
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
69
70 def raw_filenames(index_filename):
71 """Open SQLite index and extract filenames (as is).
72
73 Returns a 2-tuple, holding a list of strings, and the value
74 of the meta_data.filenames_relative_to_index (of None).
75 """
76 con = sqlite3.dbapi2.connect(index_filename)
77
78 filenames = [row[0] for row in
79 con.execute("SELECT name FROM file_data "
80 "ORDER BY file_number;").fetchall()]
81
82 try:
83 filenames_relative_to_index, = con.execute(
84 "SELECT value FROM meta_data WHERE key=?;",
85 ("filenames_relative_to_index",)).fetchone()
86 filenames_relative_to_index = (filenames_relative_to_index.upper() == "TRUE")
87 except TypeError:
88 filenames_relative_to_index = None
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
89
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
90 con.close()
91 return filenames, filenames_relative_to_index
92
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
93 class OldIndexTest(unittest.TestCase):
94 """Testing a pre-built index (make sure cross platform etc).
95
96 >>> from Bio import SeqIO
97 >>> d = SeqIO.index_db("triple_sff.idx", ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"], "sff")
98 >>> len(d)
99 54
100 """
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
101 def setUp(self):
102 os.chdir(CUR_DIR)
103
104 def tearDown(self):
105 os.chdir(CUR_DIR)
106
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
107 def test_old(self):
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
108 """Load existing index with no options (from parent directory)."""
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
109 d = SeqIO.index_db("Roche/triple_sff.idx")
110 self.assertEqual(54, len(d))
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
111 self.assertRaises(FileNotFoundError, d.get_raw, "alpha")
112
0a518f4 @peterjc Test new indexes with relative paths work
peterjc authored
113 def test_old_rel(self):
114 """Load existing index (with relative paths) with no options (from parent directory)."""
115 d = SeqIO.index_db("Roche/triple_sff_rel_paths.idx")
116 self.assertEqual(54, len(d))
117 self.assertEqual(395, len(d["alpha"]))
118
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
119 def test_old_contents(self):
120 """Check actual filenames in existing indexes."""
121 filenames, flag = raw_filenames("Roche/triple_sff.idx")
122 self.assertEqual(flag, None)
123 self.assertEqual(filenames, ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
124
125 filenames, flag = raw_filenames("Roche/triple_sff_rel_paths.idx")
126 self.assertEqual(flag, True)
127 self.assertEqual(filenames, ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
128
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
129 def test_old_same_dir(self):
130 """Load existing index with no options (from same directory)."""
131 os.chdir("Roche")
132 d = SeqIO.index_db("triple_sff.idx")
133 self.assertEqual(54, len(d))
134 self.assertEqual(395, len(d["alpha"]))
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
135
0a518f4 @peterjc Test new indexes with relative paths work
peterjc authored
136 def test_old_same_dir_rel(self):
137 """Load existing index (with relative paths) with no options (from same directory)."""
138 os.chdir("Roche")
139 d = SeqIO.index_db("triple_sff_rel_paths.idx")
140 self.assertEqual(54, len(d))
141 self.assertEqual(395, len(d["alpha"]))
142
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
143 def test_old_format(self):
144 """Load existing index with correct format."""
145 d = SeqIO.index_db("Roche/triple_sff.idx", format="sff")
146 self.assertEqual(54, len(d))
147
148 def test_old_format_wrong(self):
149 """Load existing index with wrong format."""
150 self.assertRaises(ValueError, SeqIO.index_db,
151 "Roche/triple_sff.idx", format="fasta")
152
153 def test_old_files(self):
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
154 """Load existing index with correct files (from parent directory)."""
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
155 d = SeqIO.index_db("Roche/triple_sff.idx",
156 ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
157 self.assertEqual(54, len(d))
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
158 self.assertRaises(FileNotFoundError, d.get_raw, "alpha")
159
160 def test_old_files_same_dir(self):
161 """Load existing index with correct files (from same directory)."""
162 os.chdir("Roche")
163 d = SeqIO.index_db("triple_sff.idx",
164 ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"])
165 self.assertEqual(54, len(d))
166 self.assertEqual(395, len(d["alpha"]))
37c309f @peterjc More SeqIO.index_db tests (with sample index)
peterjc authored
167
168 def test_old_files_wrong(self):
169 """Load existing index with wrong files."""
170 self.assertRaises(ValueError, SeqIO.index_db,
171 "Roche/triple_sff.idx", ["a.sff", "b.sff", "c.sff"])
172
173 def test_old_files_wrong2(self):
174 """Load existing index with wrong number of files."""
175 self.assertRaises(ValueError, SeqIO.index_db,
176 "Roche/triple_sff.idx",
177 ["E3MFGYR02_no_manifest.sff", "greek.sff"])
178
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
179 class NewIndexTest(unittest.TestCase):
180 """Check paths etc in newly built index."""
181 def setUp(self):
182 os.chdir(CUR_DIR)
183
184 def tearDown(self):
185 os.chdir(CUR_DIR)
186 for i in ["temp.idx", "Roche/temp.idx"]:
187 if os.path.isfile(i):
188 os.remove(i)
189
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
190 def check(self, index_file, sff_files, expt_sff_files):
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
191 if os.path.isfile(index_file):
192 os.remove(index_file)
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
193 # Build index...
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
194 d = SeqIO.index_db(index_file, sff_files, "sff")
195 self.assertEqual(395, len(d["alpha"]))
2a2bf4a @peterjc Must close the SQLite handle prior to deleting
peterjc authored
196 d._con.close() # hack for PyPy
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
197 d.close()
198 self.assertEqual([os.path.abspath(f) for f in sff_files],
199 [os.path.abspath(f) for f in d._filenames])
e9f188b @peterjc Explicitly test relative vs abs paths in index
peterjc authored
200
201 # Now directly check the filenames inside the SQLite index:
202 filenames, flag = raw_filenames(index_file)
203 self.assertEqual(flag, True)
204 self.assertEqual(filenames, expt_sff_files)
205
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
206 # Load index...
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
207 d = SeqIO.index_db(index_file, sff_files)
208 self.assertEqual(395, len(d["alpha"]))
2a2bf4a @peterjc Must close the SQLite handle prior to deleting
peterjc authored
209 d._con.close() # hack for PyPy
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
210 d.close()
a59165d @peterjc Test some indexing relative path setups
peterjc authored
211 self.assertEqual([os.path.abspath(f) for f in sff_files], d._filenames)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
212
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
213 os.remove(index_file)
214
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
215 def test_child_folder_rel(self):
216 """Check relative links to child folder."""
0b4a2ee @peterjc Use Unix slashes when storing relative paths
peterjc authored
217 # Note we expect relative paths recorded with Unix slashs!
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
218 expt_sff_files = ["Roche/E3MFGYR02_no_manifest.sff",
219 "Roche/greek.sff",
220 "Roche/paired.sff"]
221
222 self.check("temp.idx", expt_sff_files, expt_sff_files)
0b4a2ee @peterjc Use Unix slashes when storing relative paths
peterjc authored
223 # Here index is given as abs
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
224 self.check(os.path.abspath("temp.idx"),
225 ["Roche/E3MFGYR02_no_manifest.sff",
226 os.path.abspath("Roche/greek.sff"),
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
227 "Roche/paired.sff"],
228 expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
229 # Here index is given as relative path
230 self.check("temp.idx",
231 ["Roche/E3MFGYR02_no_manifest.sff",
232 os.path.abspath("Roche/greek.sff"),
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
233 "Roche/paired.sff"],
234 expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
235
236 def test_same_folder(self):
237 """Check relative links in same folder."""
238 os.chdir("Roche")
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
239 expt_sff_files = ["E3MFGYR02_no_manifest.sff", "greek.sff", "paired.sff"]
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
240
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
241 # Here everything is relative,
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
242 self.check("temp.idx", expt_sff_files, expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
243 self.check(os.path.abspath("temp.idx"),
244 ["E3MFGYR02_no_manifest.sff",
245 os.path.abspath("greek.sff"),
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
246 "../Roche/paired.sff"],
247 expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
248 self.check("temp.idx",
249 ["E3MFGYR02_no_manifest.sff",
250 os.path.abspath("greek.sff"),
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
251 "../Roche/paired.sff"],
252 expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
253 self.check("../Roche/temp.idx",
254 ["E3MFGYR02_no_manifest.sff",
255 os.path.abspath("greek.sff"),
da7ff08 @peterjc Directly check filenames actually recorded for index
peterjc authored
256 "../Roche/paired.sff"],
257 expt_sff_files)
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
258
e9f188b @peterjc Explicitly test relative vs abs paths in index
peterjc authored
259 def test_some_abs(self):
260 """Check absolute filenames in index."""
261 h, t = tempfile.mkstemp(prefix="index_test_", suffix=".idx")
262 os.close(h)
263 os.remove(t)
264
265 expt_sff_files = [os.path.abspath("Roche/E3MFGYR02_no_manifest.sff"),
266 os.path.abspath("Roche/greek.sff"),
267 os.path.abspath(os.path.join("Roche", "paired.sff"))]
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
268 # All absolute paths...
e9f188b @peterjc Explicitly test relative vs abs paths in index
peterjc authored
269 self.check(t, expt_sff_files, expt_sff_files)
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
270 # Now try with mix of abs and relative paths...
e9f188b @peterjc Explicitly test relative vs abs paths in index
peterjc authored
271 self.check(t,
272 [os.path.abspath("Roche/E3MFGYR02_no_manifest.sff"),
273 os.path.join("Roche", "greek.sff"),
274 os.path.abspath("Roche/paired.sff")],
275 expt_sff_files)
276
7d7bfe4 @peterjc Test some indexing relative path setups
peterjc authored
277
9252f1c @peterjc No code changes. Removing white space before ':' character to match PEP8...
peterjc authored
278 class IndexDictTests(unittest.TestCase):
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
279 """Cunning unit test where methods are added at run time."""
67a3ef3 @peterjc Use system temp for test_SeqIO_index.py index files
peterjc authored
280 def setUp(self):
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
281 os.chdir(CUR_DIR)
fbf21ac @peterjc Use system temp for SQLite index files
peterjc authored
282 h, self.index_tmp = tempfile.mkstemp("_idx.tmp")
283 os.close(h)
67a3ef3 @peterjc Use system temp for test_SeqIO_index.py index files
peterjc authored
284
285 def tearDown(self):
d9d1bb2 @peterjc Test brittle relative paths in Bio.SeqIO.index_db (issue #267)
peterjc authored
286 os.chdir(CUR_DIR)
67a3ef3 @peterjc Use system temp for test_SeqIO_index.py index files
peterjc authored
287 if os.path.isfile(self.index_tmp):
288 os.remove(self.index_tmp)
289
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
290 def simple_check(self, filename, format, alphabet, comp):
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
291 """Check indexing (without a key function)."""
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
292 if comp:
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
293 h = gzip_open(filename, format)
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
294 id_list = [rec.id for rec in SeqIO.parse(h, format, alphabet)]
295 h.close()
296 else:
297 id_list = [rec.id for rec in SeqIO.parse(filename, format, alphabet)]
c7b409e @peterjc Bio.SeqIO.index() refactor (manual merge from branch)
peterjc authored
298
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
299 rec_dict = SeqIO.index(filename, format, alphabet)
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
300 self.check_dict_methods(rec_dict, id_list, id_list)
23521fd @peterjc Replace index close hacks with new public method
peterjc authored
301 rec_dict.close()
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
302 del rec_dict
39781ec @peterjc Cope with missing sqlite3 (e.g. Python 2.4)
peterjc authored
303
304 if not sqlite3:
305 return
306
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
307 # In memory,
308 # note here give filenames as list of strings
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
309 rec_dict = SeqIO.index_db(":memory:", [filename], format,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
310 alphabet)
cee2567 @peterjc Use :memory: to avoid leaving open handles
peterjc authored
311 self.check_dict_methods(rec_dict, id_list, id_list)
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
312 rec_dict.close()
313 del rec_dict
314
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
315 # check error conditions
cee2567 @peterjc Use :memory: to avoid leaving open handles
peterjc authored
316 self.assertRaises(ValueError, SeqIO.index_db,
317 ":memory:", format="dummy")
318 self.assertRaises(ValueError, SeqIO.index_db,
319 ":memory:", filenames=["dummy"])
320
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
321 # Saving to file...
67a3ef3 @peterjc Use system temp for test_SeqIO_index.py index files
peterjc authored
322 index_tmp = self.index_tmp
81f49b3 @peterjc Cope with stale indexes in test_SeqIO_index.py
peterjc authored
323 if os.path.isfile(index_tmp):
324 os.remove(index_tmp)
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
325
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
326 # To disk,
327 # note here we give the filename as a single string
328 # to confirm that works too (convience feature).
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
329 rec_dict = SeqIO.index_db(index_tmp, filename, format,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
330 alphabet)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
331 self.check_dict_methods(rec_dict, id_list, id_list)
c9f6cde @peterjc Yet another index_db handle close for Windows
peterjc authored
332 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
333 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
334 del rec_dict
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
335
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
336 # Now reload it...
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
337 rec_dict = SeqIO.index_db(index_tmp, [filename], format,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
338 alphabet)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
339 self.check_dict_methods(rec_dict, id_list, id_list)
20a3c2c @peterjc Another index_db handle close for Windows
peterjc authored
340 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
341 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
342 del rec_dict
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
343
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
344 # Now reload without passing filenames and format
b477531 @peterjc Test changing folder (idx should be using abs paths here)
peterjc authored
345 # and switch directory to check paths still work
346 index_tmp = os.path.abspath(index_tmp)
347 os.chdir(os.path.dirname(filename))
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
348 rec_dict = SeqIO.index_db(index_tmp, alphabet=alphabet)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
349 self.check_dict_methods(rec_dict, id_list, id_list)
60c2d0d @peterjc Close Bio.SeqIO.index_db() handles before delete index file (important o...
peterjc authored
350 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
351 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
352 del rec_dict
b477531 @peterjc Test changing folder (idx should be using abs paths here)
peterjc authored
353
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
354 os.remove(index_tmp)
b1c8597 @cbrueffer Trim EOL whitespace (PEP8 W291, W293), batch 3.
cbrueffer authored
355
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
356 def key_check(self, filename, format, alphabet, comp):
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
357 """Check indexing with a key function."""
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
358 if comp:
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
359 h = gzip_open(filename, format)
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
360 id_list = [rec.id for rec in SeqIO.parse(h, format, alphabet)]
361 h.close()
362 else:
363 id_list = [rec.id for rec in SeqIO.parse(filename, format, alphabet)]
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
364
365 key_list = [add_prefix(id) for id in id_list]
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
366 rec_dict = SeqIO.index(filename, format, alphabet, add_prefix)
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
367 self.check_dict_methods(rec_dict, key_list, id_list)
23521fd @peterjc Replace index close hacks with new public method
peterjc authored
368 rec_dict.close()
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
369 del rec_dict
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
370
371 if not sqlite3:
372 return
373
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
374 # In memory,
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
375 rec_dict = SeqIO.index_db(":memory:", [filename], format, alphabet,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
376 add_prefix)
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
377 self.check_dict_methods(rec_dict, key_list, id_list)
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
378 # check error conditions
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
379 self.assertRaises(ValueError, SeqIO.index_db,
380 ":memory:", format="dummy",
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
381 key_function=add_prefix)
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
382 self.assertRaises(ValueError, SeqIO.index_db,
383 ":memory:", filenames=["dummy"],
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
384 key_function=add_prefix)
e07df4d @peterjc Try to close more handles explicitly in test_SeqIO_index.py
peterjc authored
385 rec_dict.close()
386 del rec_dict
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
387
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
388 # Saving to file...
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
389 index_tmp = filename + ".key.idx"
961476d @peterjc Fix 2 handle leaks in test_SeqIO_index.py and cope with stale idx file
peterjc authored
390 if os.path.isfile(index_tmp):
01eaa8a @peterjc Fix typo in last commit
peterjc authored
391 os.remove(index_tmp)
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
392 rec_dict = SeqIO.index_db(index_tmp, [filename], format, alphabet,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
393 add_prefix)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
394 self.check_dict_methods(rec_dict, key_list, id_list)
a780f05 @peterjc Close some overlooked handles in index_db test
peterjc authored
395 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
396 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
397 del rec_dict
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
398
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
399 # Now reload it...
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
400 rec_dict = SeqIO.index_db(index_tmp, [filename], format, alphabet,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
401 add_prefix)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
402 self.check_dict_methods(rec_dict, key_list, id_list)
a780f05 @peterjc Close some overlooked handles in index_db test
peterjc authored
403 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
404 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
405 del rec_dict
0660396 @peterjc Short term fix to some unit test handle leaks
peterjc authored
406
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
407 # Now reload without passing filenames and format
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
408 rec_dict = SeqIO.index_db(index_tmp, alphabet=alphabet,
9bb8590 @peterjc Auto detect BGZF in SeqIO index functions
peterjc authored
409 key_function=add_prefix)
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
410 self.check_dict_methods(rec_dict, key_list, id_list)
60c2d0d @peterjc Close Bio.SeqIO.index_db() handles before delete index file (important o...
peterjc authored
411 rec_dict.close()
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
412 rec_dict._con.close() # hack for PyPy
6752f52 @peterjc Restructure test_SeqIO_index.py (use one temp file per test)
peterjc authored
413 del rec_dict
adb28a3 @peterjc Adding Bio.SeqIO.index_many(...) function using SQLite3
peterjc authored
414 os.remove(index_tmp)
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
415 # Done
b1c8597 @cbrueffer Trim EOL whitespace (PEP8 W291, W293), batch 3.
cbrueffer authored
416
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
417 def check_dict_methods(self, rec_dict, keys, ids):
b542e7c @peterjc Style change, avoid unnecessary dictionary .keys() method call
peterjc authored
418 self.assertEqual(set(keys), set(rec_dict))
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
419 # This is redundant, I just want to make sure len works:
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
420 self.assertEqual(len(keys), len(rec_dict))
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
421 # Make sure boolean evaluation works
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
422 self.assertEqual(bool(keys), bool(rec_dict))
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
423 for key, id in zip(keys, ids):
75dd0b7 @peterjc Use assertTrue in unit tests (added in Python 2.4) since the earlier ass...
peterjc authored
424 self.assertTrue(key in rec_dict)
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
425 self.assertEqual(id, rec_dict[key].id)
426 self.assertEqual(id, rec_dict.get(key).id)
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
427 # Check non-existant keys,
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
428 assert chr(0) not in keys, "Bad example in test"
9252f1c @peterjc No code changes. Removing white space before ':' character to match PEP8...
peterjc authored
429 try:
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
430 rec = rec_dict[chr(0)]
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
431 raise ValueError("Accessing a non-existent key should fail")
9252f1c @peterjc No code changes. Removing white space before ':' character to match PEP8...
peterjc authored
432 except KeyError:
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
433 pass
434 self.assertEqual(rec_dict.get(chr(0)), None)
435 self.assertEqual(rec_dict.get(chr(0), chr(1)), chr(1))
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
436 if hasattr(dict, "iteritems"):
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
437 # Python 2.x
c83880b @peterjc Replace .iteritems() with .iter() and .itervalues() with .values()
peterjc authored
438 for key, rec in rec_dict.items():
b35977f @peterjc Reduce code duplication in test_SeqIO_index.py
peterjc authored
439 self.assertTrue(key in keys)
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
440 self.assertTrue(isinstance(rec, SeqRecord))
9f0c3be @peterjc Fixed silly typo in recent test_SeqIO_index.py change
peterjc authored
441 self.assertTrue(rec.id in ids)
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
442 else:
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
443 # Python 3
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
444 assert not hasattr(rec_dict, "iteritems")
c83880b @peterjc Replace .iteritems() with .iter() and .itervalues() with .values()
peterjc authored
445 for key, rec in rec_dict.items():
1096ef2 @peterjc Fix to (skipped) Python 3 code in index test
peterjc authored
446 self.assertTrue(key in keys)
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
447 self.assertTrue(isinstance(rec, SeqRecord))
9f0c3be @peterjc Fixed silly typo in recent test_SeqIO_index.py change
peterjc authored
448 self.assertTrue(rec.id in ids)
c83880b @peterjc Replace .iteritems() with .iter() and .itervalues() with .values()
peterjc authored
449 for rec in rec_dict.values():
1096ef2 @peterjc Fix to (skipped) Python 3 code in index test
peterjc authored
450 self.assertTrue(key in keys)
611432e @peterjc Update unit test for Bio.SeqIO.index() for Python 3 due to different dic...
peterjc authored
451 self.assertTrue(isinstance(rec, SeqRecord))
9f0c3be @peterjc Fixed silly typo in recent test_SeqIO_index.py change
peterjc authored
452 self.assertTrue(rec.id in ids)
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
453 # Check the following fail
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
454 self.assertRaises(NotImplementedError, rec_dict.popitem)
455 self.assertRaises(NotImplementedError, rec_dict.pop, chr(0))
456 self.assertRaises(NotImplementedError, rec_dict.pop, chr(0), chr(1))
457 self.assertRaises(NotImplementedError, rec_dict.clear)
458 self.assertRaises(NotImplementedError, rec_dict.__setitem__, "X", None)
459 self.assertRaises(NotImplementedError, rec_dict.copy)
460 self.assertRaises(NotImplementedError, rec_dict.fromkeys, [])
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
461
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
462 def get_raw_check(self, filename, format, alphabet, comp):
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
463 # Also checking the key_function here
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
464 if comp:
465 h = gzip.open(filename, "rb")
466 raw_file = h.read()
467 h.close()
3b295cc @peterjc Hack for test_SeqIO_index.py under Python 3 (bug in Python 3?)
peterjc authored
468 h = gzip_open(filename, format)
9dfec35 @cbrueffer Zap redundant backslashes between brackets (PEP8 E502).
cbrueffer authored
469 id_list = [rec.id.lower() for rec in
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
470 SeqIO.parse(h, format, alphabet)]
471 h.close()
472 else:
473 h = open(filename, "rb")
474 raw_file = h.read()
475 h.close()
9dfec35 @cbrueffer Zap redundant backslashes between brackets (PEP8 E502).
cbrueffer authored
476 id_list = [rec.id.lower() for rec in
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
477 SeqIO.parse(filename, format, alphabet)]
4e2ed2b @peterjc Silence SFF indexing test warning for .diy dummy index
peterjc authored
478
479 if format in ["sff"]:
480 with warnings.catch_warnings():
481 warnings.simplefilter('ignore', BiopythonParserWarning)
482 rec_dict = SeqIO.index(filename, format, alphabet,
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
483 key_function=lambda x: x.lower())
4e2ed2b @peterjc Silence SFF indexing test warning for .diy dummy index
peterjc authored
484 else:
485 rec_dict = SeqIO.index(filename, format, alphabet,
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
486 key_function=lambda x: x.lower())
4e2ed2b @peterjc Silence SFF indexing test warning for .diy dummy index
peterjc authored
487
b542e7c @peterjc Style change, avoid unnecessary dictionary .keys() method call
peterjc authored
488 self.assertEqual(set(id_list), set(rec_dict))
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
489 self.assertEqual(len(id_list), len(rec_dict))
490 for key in id_list:
75dd0b7 @peterjc Use assertTrue in unit tests (added in Python 2.4) since the earlier ass...
peterjc authored
491 self.assertTrue(key in rec_dict)
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
492 self.assertEqual(key, rec_dict[key].id.lower())
493 self.assertEqual(key, rec_dict.get(key).id.lower())
494 raw = rec_dict.get_raw(key)
75dd0b7 @peterjc Use assertTrue in unit tests (added in Python 2.4) since the earlier ass...
peterjc authored
495 self.assertTrue(raw.strip())
496 self.assertTrue(raw in raw_file)
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
497 rec1 = rec_dict[key]
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
498 # Following isn't very elegant, but it lets me test the
9feade6 @peterjc PEP8 tidying for SeqIO index work
peterjc authored
499 # __getitem__ SFF code is working.
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
500 if format in SeqIO._BinaryFormats:
501 handle = BytesIO(raw)
502 else:
461b945 @peterjc Index in binary mode
peterjc authored
503 handle = StringIO(_bytes_to_string(raw))
b87e8a8 @peterjc SFF get_raw support for Bio.SeqIO.index
peterjc authored
504 if format == "sff":
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
505 rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
c7b409e @peterjc Bio.SeqIO.index() refactor (manual merge from branch)
peterjc authored
506 rec_dict._proxy._flows_per_read,
507 rec_dict._proxy._flow_chars,
508 rec_dict._proxy._key_sequence,
509 rec_dict._proxy._alphabet,
b87e8a8 @peterjc SFF get_raw support for Bio.SeqIO.index
peterjc authored
510 trim=False)
511 elif format == "sff-trim":
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
512 rec2 = SeqIO.SffIO._sff_read_seq_record(handle,
c7b409e @peterjc Bio.SeqIO.index() refactor (manual merge from branch)
peterjc authored
513 rec_dict._proxy._flows_per_read,
514 rec_dict._proxy._flow_chars,
515 rec_dict._proxy._key_sequence,
516 rec_dict._proxy._alphabet,
b87e8a8 @peterjc SFF get_raw support for Bio.SeqIO.index
peterjc authored
517 trim=True)
5429f0d @peterjc Test uniprot-xml get_raw code
peterjc authored
518 elif format == "uniprot-xml":
461b945 @peterjc Index in binary mode
peterjc authored
519 self.assertTrue(raw.startswith(_as_bytes("<entry ")))
520 self.assertTrue(raw.endswith(_as_bytes("</entry>")))
82272b1 @peterjc autopep8 E265 - Format block comments.
peterjc authored
521 # Currently the __getitem__ method uses this
522 # trick too, but we hope to fix that later
5429f0d @peterjc Test uniprot-xml get_raw code
peterjc authored
523 raw = """<?xml version='1.0' encoding='UTF-8'?>
524 <uniprot xmlns="http://uniprot.org/uniprot"
525 xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
526 xsi:schemaLocation="http://uniprot.org/uniprot
527 http://www.uniprot.org/support/docs/uniprot.xsd">
528 %s
529 </uniprot>
461b945 @peterjc Index in binary mode
peterjc authored
530 """ % _bytes_to_string(raw)
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
531 handle = StringIO(raw)
532 rec2 = SeqIO.read(handle, format, alphabet)
b87e8a8 @peterjc SFF get_raw support for Bio.SeqIO.index
peterjc authored
533 else:
a2f1110 @peterjc BytesIO vs StringIO for testing on Python 3
peterjc authored
534 rec2 = SeqIO.read(handle, format, alphabet)
9882b07 @peterjc Fixed indentation
peterjc authored
535 self.assertEqual(True, compare_record(rec1, rec2))
23521fd @peterjc Replace index close hacks with new public method
peterjc authored
536 rec_dict.close()
2a2ae05 @peterjc Short term fix to test_SeqIO_index.py handle leak
peterjc authored
537 del rec_dict
15d55e3 @peterjc Test indexing files with duplicate identifiers
peterjc authored
538
39781ec @peterjc Cope with missing sqlite3 (e.g. Python 2.4)
peterjc authored
539 if sqlite3:
540 def test_duplicates_index_db(self):
541 """Index file with duplicate identifers with Bio.SeqIO.index_db()"""
2c02245 @peterjc Fixed another file handle leak in this test (will it work on Windows now...
peterjc authored
542 self.assertRaises(ValueError, SeqIO.index_db, ":memory:",
39781ec @peterjc Cope with missing sqlite3 (e.g. Python 2.4)
peterjc authored
543 ["Fasta/dups.fasta"], "fasta")
b61db40 @peterjc Another small unit test for Bio.SeqIO.index_many
peterjc authored
544
15d55e3 @peterjc Test indexing files with duplicate identifiers
peterjc authored
545 def test_duplicates_index(self):
30ff0e9 @peterjc Fixed tab/spaces
peterjc authored
546 """Index file with duplicate identifers with Bio.SeqIO.index()"""
547 self.assertRaises(ValueError, SeqIO.index, "Fasta/dups.fasta", "fasta")
15d55e3 @peterjc Test indexing files with duplicate identifiers
peterjc authored
548
549 def test_duplicates_to_dict(self):
30ff0e9 @peterjc Fixed tab/spaces
peterjc authored
550 """Index file with duplicate identifers with Bio.SeqIO.to_dict()"""
9aa50e5 @peterjc Avoid open(filename, 'rU') on Python 3
peterjc authored
551 handle = open("Fasta/dups.fasta", _universal_read_mode)
30ff0e9 @peterjc Fixed tab/spaces
peterjc authored
552 iterator = SeqIO.parse(handle, "fasta")
553 self.assertRaises(ValueError, SeqIO.to_dict, iterator)
554 handle.close()
555
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
556 tests = [
557 ("Ace/contig1.ace", "ace", generic_dna),
558 ("Ace/consed_sample.ace", "ace", None),
559 ("Ace/seq.cap.ace", "ace", generic_dna),
3563d3c @peterjc Removing evil_wrapping.fastq which is now included as wrapping_original_...
peterjc authored
560 ("Quality/wrapping_original_sanger.fastq", "fastq", None),
d18aab8 @peterjc autopep8 E261 - Fix spacing after comment hash
peterjc authored
561 ("Quality/example.fastq", "fastq", None), # Unix newlines
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
562 ("Quality/example.fastq", "fastq-sanger", generic_dna),
d18aab8 @peterjc autopep8 E261 - Fix spacing after comment hash
peterjc authored
563 ("Quality/example_dos.fastq", "fastq", None), # DOS/Windows newlines
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
564 ("Quality/tricky.fastq", "fastq", generic_nucleotide),
565 ("Quality/sanger_faked.fastq", "fastq-sanger", generic_dna),
566 ("Quality/solexa_faked.fastq", "fastq-solexa", generic_dna),
567 ("Quality/illumina_faked.fastq", "fastq-illumina", generic_dna),
e808d99 @peterjc Testing indexing FASTQ file with zero length record (issue #269)
peterjc authored
568 ("Quality/zero_length.fastq", "fastq", generic_dna),
26dad42 @peterjc Include EMBL example with max length line wrapping, and fix previous com...
peterjc authored
569 ("EMBL/epo_prt_selection.embl", "embl", None),
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
570 ("EMBL/U87107.embl", "embl", None),
571 ("EMBL/TRBG361.embl", "embl", None),
10a6320 @cbrueffer Fix spacing around inline comments (E261, E262).
cbrueffer authored
572 ("EMBL/A04195.imgt", "embl", None), # Not a proper EMBL file, an IMGT file
6d1e144 @peterjc IMGT support in Bio.SeqIO (developed on a github branch with Uri Laserso...
peterjc authored
573 ("EMBL/A04195.imgt", "imgt", None),
983ccc5 @peterjc EMBL indexing bug reported by @eparker05
peterjc authored
574 ("EMBL/patents.embl", "embl", generic_protein),
575 ("EMBL/AAA03323.embl", "embl", None),
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
576 ("GenBank/NC_000932.faa", "fasta", generic_protein),
577 ("GenBank/NC_005816.faa", "fasta", generic_protein),
f3214f2 @peterjc Include a simple tabbed file in the Bio.SeqIO.index() tests
peterjc authored
578 ("GenBank/NC_005816.tsv", "tab", generic_protein),
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
579 ("GenBank/NC_005816.ffn", "fasta", generic_dna),
580 ("GenBank/NC_005816.fna", "fasta", generic_dna),
581 ("GenBank/NC_005816.gb", "gb", None),
582 ("GenBank/cor6_6.gb", "genbank", None),
583 ("IntelliGenetics/vpu_nucaligned.txt", "ig", generic_nucleotide),
584 ("IntelliGenetics/TAT_mase_nuc.txt", "ig", None),
585 ("IntelliGenetics/VIF_mase-pro.txt", "ig", generic_protein),
586 ("Phd/phd1", "phd", generic_dna),
587 ("Phd/phd2", "phd", None),
588 ("Phd/phd_solexa", "phd", generic_dna),
589 ("Phd/phd_454", "phd", generic_dna),
590 ("NBRF/B_nuc.pir", "pir", generic_nucleotide),
591 ("NBRF/Cw_prot.pir", "pir", generic_protein),
592 ("NBRF/clustalw.pir", "pir", None),
593 ("SwissProt/sp001", "swiss", None),
594 ("SwissProt/sp010", "swiss", None),
595 ("SwissProt/sp016", "swiss", None),
b1fd6d6 @peterjc Rename SwissProt/UniProt examples so number of entries isn't in filename...
peterjc authored
596 ("SwissProt/multi_ex.txt", "swiss", None),
597 ("SwissProt/multi_ex.xml", "uniprot-xml", None),
598 ("SwissProt/multi_ex.fasta", "fasta", None),
0078bd5 @peterjc Manually grabbed my SFF code from my 'index' branch
peterjc authored
599 ("Roche/E3MFGYR02_random_10_reads.sff", "sff", generic_dna),
6896cd3 @peterjc Better handling of SFF files with non-standard (i.e. unknown) index bloc...
peterjc authored
600 ("Roche/E3MFGYR02_random_10_reads.sff", "sff-trim", generic_dna),
0078bd5 @peterjc Manually grabbed my SFF code from my 'index' branch
peterjc authored
601 ("Roche/E3MFGYR02_index_at_start.sff", "sff", generic_dna),
602 ("Roche/E3MFGYR02_index_in_middle.sff", "sff", generic_dna),
6896cd3 @peterjc Better handling of SFF files with non-standard (i.e. unknown) index bloc...
peterjc authored
603 ("Roche/E3MFGYR02_alt_index_at_start.sff", "sff", generic_dna),
604 ("Roche/E3MFGYR02_alt_index_in_middle.sff", "sff", generic_dna),
605 ("Roche/E3MFGYR02_alt_index_at_end.sff", "sff", generic_dna),
46567f8 @peterjc Using new .srt1.00 style index SFF file in unit tests
peterjc authored
606 ("Roche/E3MFGYR02_no_manifest.sff", "sff", generic_dna),
0078bd5 @peterjc Manually grabbed my SFF code from my 'index' branch
peterjc authored
607 ("Roche/greek.sff", "sff", generic_nucleotide),
6896cd3 @peterjc Better handling of SFF files with non-standard (i.e. unknown) index bloc...
peterjc authored
608 ("Roche/greek.sff", "sff-trim", generic_nucleotide),
0078bd5 @peterjc Manually grabbed my SFF code from my 'index' branch
peterjc authored
609 ("Roche/paired.sff", "sff", None),
6896cd3 @peterjc Better handling of SFF files with non-standard (i.e. unknown) index bloc...
peterjc authored
610 ("Roche/paired.sff", "sff-trim", None),
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
611 ]
9252f1c @peterjc No code changes. Removing white space before ':' character to match PEP8...
peterjc authored
612 for filename, format, alphabet in tests:
c7b409e @peterjc Bio.SeqIO.index() refactor (manual merge from branch)
peterjc authored
613 assert format in _FormatToRandomAccess
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
614 tasks = [(filename, None)]
fca34a9 @peterjc Skip BGZF tests in test_SeqIO_index.py if issue 17666
peterjc authored
615 if do_bgzf and os.path.isfile(filename + ".bgz"):
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
616 tasks.append((filename + ".bgz", "bgzf"))
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
617 for filename, comp in tasks:
618
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
619 def funct(fn, fmt, alpha, c):
b0496b3 @peterjc autopep8 E203 - Remove extraneous whitespace (colon related)
peterjc authored
620 f = lambda x: x.simple_check(fn, fmt, alpha, c)
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
621 f.__doc__ = "Index %s file %s defaults" % (fmt, fn)
622 return f
9dfec35 @cbrueffer Zap redundant backslashes between brackets (PEP8 E502).
cbrueffer authored
623 setattr(IndexDictTests, "test_%s_%s_simple"
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
624 % (format, filename.replace("/", "_").replace(".", "_")),
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
625 funct(filename, format, alphabet, comp))
626 del funct
627
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
628 def funct(fn, fmt, alpha, c):
b0496b3 @peterjc autopep8 E203 - Remove extraneous whitespace (colon related)
peterjc authored
629 f = lambda x: x.key_check(fn, fmt, alpha, c)
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
630 f.__doc__ = "Index %s file %s with key function" % (fmt, fn)
631 return f
9dfec35 @cbrueffer Zap redundant backslashes between brackets (PEP8 E502).
cbrueffer authored
632 setattr(IndexDictTests, "test_%s_%s_keyf"
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
633 % (format, filename.replace("/", "_").replace(".", "_")),
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
634 funct(filename, format, alphabet, comp))
635 del funct
636
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
637 def funct(fn, fmt, alpha, c):
b0496b3 @peterjc autopep8 E203 - Remove extraneous whitespace (colon related)
peterjc authored
638 f = lambda x: x.get_raw_check(fn, fmt, alpha, c)
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
639 f.__doc__ = "Index %s file %s get_raw" % (fmt, fn)
640 return f
9dfec35 @cbrueffer Zap redundant backslashes between brackets (PEP8 E502).
cbrueffer authored
641 setattr(IndexDictTests, "test_%s_%s_get_raw"
14977ce @peterjc Apply 2to3 ws_comma fixer (white space changes)
peterjc authored
642 % (format, filename.replace("/", "_").replace(".", "_")),
dbaf109 @peterjc Test Bio.SeqIO indexing with GZIP and BGZF files
peterjc authored
643 funct(filename, format, alphabet, comp))
644 del funct
f797cb9 @peterjc Adding get_raw method to Bio.SeqIO.index() dictionary class (see Bug 300...
peterjc authored
645
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
646 if __name__ == "__main__":
b67cebf @peterjc Fix common PEP8 violation in unittest verbosity argument
peterjc authored
647 runner = unittest.TextTestRunner(verbosity=2)
8af957d @peterjc Adding the Bio.SeqIO.indexed_dict() function developed on a github branc...
peterjc authored
648 unittest.main(testRunner=runner)
Something went wrong with that request. Please try again.