Skip to content

Commit

Permalink
Record all the raw ABI file tags in a dictionary
Browse files Browse the repository at this point in the history
With input from Mike Cariaso (@cariaso), David Bulger
(@DavidBulger), and Wibowo Arindrarto (@bow).

This is intended to allow advanced analysis of the
instrument-specific tags from the ABI sequencers, see:

http://www.appliedbiosystems.com/support/software_community/ABIF_File_Format.pdf
  • Loading branch information
peterjc committed Jul 10, 2014
1 parent 8230ee1 commit b20f364
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 1 deletion.
9 changes: 8 additions & 1 deletion Bio/SeqIO/AbiIO.py
@@ -1,5 +1,5 @@
# Copyright 2011 by Wibowo Arindrarto (w.arindrarto@gmail.com)
# Revisions copyright 2011 by Peter Cock.
# Revisions copyright 2011, 2014 by Peter Cock.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
Expand Down Expand Up @@ -115,6 +115,7 @@ def AbiIterator(handle, alphabet=None, trim=False):
header = struct.unpack(_HEADFMT,
handle.read(struct.calcsize(_HEADFMT)))

raw = dict()
for tag_name, tag_number, tag_data in _abi_parse_header(header, handle):
# stop iteration if all desired tags have been extracted
# 4 tags from _EXTRACT + 2 time tags from _SPCTAGS - 3,
Expand All @@ -123,6 +124,9 @@ def AbiIterator(handle, alphabet=None, trim=False):

key = tag_name + str(tag_number)

# TODO - Why not store the raw data in bytes, not as strings?
raw[key] = tag_data

# PBAS2 is base-called sequence
if key == 'PBAS2':
seq = tag_data
Expand All @@ -149,6 +153,9 @@ def AbiIterator(handle, alphabet=None, trim=False):
annot['run_start'] = '%s %s' % (times['RUND1'], times['RUNT1'])
annot['run_finish'] = '%s %s' % (times['RUND2'], times['RUNT2'])

# raw data (for advanced end users benefit)
annot['abif_raw'] = raw

# use the file name as SeqRecord.name if available
try:
file_name = basename(handle.name).replace('.ab1', '')
Expand Down
8 changes: 8 additions & 0 deletions NEWS
Expand Up @@ -10,6 +10,14 @@ The latest news is at the top of this file.

===================================================================

(In progress, not yet released) Biopython 1.65

The Bio.SeqIO parser for the ABI capillary file format now exposes
all the raw data in the SeqRecord's annotation as a dictionary. This
allows further in-depth analysis by advanced users.

===================================================================

29 May 2014: Biopython 1.64 released.

This release of Biopython supports Python 2.6 and 2.7, 3.3 and also the
Expand Down
30 changes: 30 additions & 0 deletions Tests/test_SeqIO_AbiIO.py
Expand Up @@ -20,6 +20,8 @@
'machine_model': '3730',
'run_start': '2009-12-12 09:56:53',
'run_finish': '2009-12-12 11:44:49',
'abif_raw_keys': set(['RUND2', 'RUND1', 'DySN1', 'SMPL1', 'GTyp1',
'PCON2', 'RUNT2', 'PBAS2', 'RUNT1', 'MODL1', 'TUBE1']),
},
'data_3730': {
'path': ['Abi', '3730.ab1'],
Expand All @@ -34,6 +36,8 @@
'machine_model': '3730',
'run_start': '2009-12-12 09:56:53',
'run_finish': '2009-12-12 11:44:49',
'abif_raw_keys': set(['RUND2', 'RUND1', 'DySN1', 'SMPL1', 'GTyp1',
'PCON2', 'RUNT2', 'PBAS2', 'RUNT1', 'MODL1', 'TUBE1']),
},
'data_3100': {
'path': ['Abi', '3100.ab1'],
Expand All @@ -48,6 +52,8 @@
'machine_model': '3100',
'run_start': '2010-01-27 09:52:45',
'run_finish': '2010-01-27 10:41:07',
'abif_raw_keys': set(['RUND2', 'RUND1', 'DySN1', 'SMPL1', 'GTyp1',
'PCON2', 'RUNT2', 'PBAS2', 'RUNT1', 'MODL1', 'TUBE1']),
},
'data_310': {
'path': ['Abi', '310.ab1'],
Expand All @@ -62,6 +68,8 @@
'machine_model': '310 ',
'run_start': '2009-02-19 01:19:30',
'run_finish': '2009-02-19 04:04:15',
'abif_raw_keys': set(['RUND2', 'RUND1', 'SMPL1',
'PCON2', 'RUNT2', 'PBAS2', 'RUNT1', 'MODL1', 'TUBE1']),
},
}

Expand Down Expand Up @@ -112,6 +120,7 @@ def test_seqrecord(self):
self.assertEqual(test_data[trace]['machine_model'], record.annotations['machine_model'])
self.assertEqual(test_data[trace]['run_start'], record.annotations['run_start'])
self.assertEqual(test_data[trace]['run_finish'], record.annotations['run_finish'])
self.assertEqual(test_data[trace]['abif_raw_keys'], set(record.annotations['abif_raw']))

def test_trim(self):
"""Test if trim works."""
Expand All @@ -123,6 +132,27 @@ def test_trim(self):
else:
self.assertEqual(str(record.seq), test_data[trace]['seq'])

def test_raw(self):
"""Test access to raw ABIF tags."""
record = SeqIO.read("Abi/A6_1-DB3.ab1", "abi")
self.assertEqual(set(record.annotations),
set(["polymer", "run_finish", "sample_well", "run_start",
"machine_model", "dye", "abif_raw"]))
self.assertEqual(set(record.annotations["abif_raw"]),
set(["RUND2", "RUND1", "DySN1", "SMPL1", "GTyp1", "PCON2",
"RUNT2", "PBAS2", "RUNT1", "MODL1", "TUBE1"]))
self.assertEqual(record.annotations["abif_raw"]['RUND2'], '2014-06-04')
self.assertEqual(record.annotations["abif_raw"]['RUND1'], '2014-06-04')
self.assertEqual(record.annotations["abif_raw"]['DySN1'], 'Z-BigDyeV3')
self.assertEqual(record.annotations["abif_raw"]['SMPL1'], 'A6_1-DB3')
self.assertEqual(record.annotations["abif_raw"]['GTyp1'], 'POP7 ')
self.assertEqual(record.annotations["abif_raw"]['PCON2'], '\x02\x02\x03\x04\x06\x05\x04\x04\t\x05\n\x04\x04\t\x05\x05\x06\x13\x1e\x0b\t\x06\t\x06\x05"\x16\'\x17\x12\x11):\x18"\x13\x0e:*\x1a\x14\x1444)6:6\'\x15*"/)/>>>>>>>>%\'6,,>>>>>>>;>88$&!.;>;+;>>>>>>;\'>>>>>>;>>>>;>;>>>>>>>;>>>>;;>>>>>>>;>>>>>>>>;>>>>>>>>>>>>>>>>>>>>>>>>>>>6>>>>>>>>>>>>;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>61>>>>>>>>>>>;;>>>>>>>>>>>>>6666;>>>>>>;;>>>>>>>11>>>>;;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>;>>>>>>>>>;;>>>>>;;>>>>>>>>>>>>>>>>>>>;;>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>1>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>8>>>>1>>>>>>>>>>>>>>6>>>>>>>>>>>>>>>>>>>/6>>>6:/::::6::6:::::6:::::::60::::::::::::66:::::::6::6:6:::::::::-*:::.15355)::3::322::+)0::4:.:::636*:3:36:5:5::31.$-5+1.463:4::::(5423 :\x1b221:2,,%:2+*64:25%:\x1f234:2:\')466312(25413116\'%63\x1f\' \x1661\x1762\r1\x10\'-"\'6-56 5"5/(4\'*6663%\x1b3*(4 \'"6/(6/)65*45 *(5\x1f\x1f\x1f -#\x1e( %*/(\x11\r\x13\x18 \x15/\x1d / 4\x17\x1e %(\x1d 5 \x1f%!\x1f\x1f( %\x19\x1d\x0c\x0f\x12$\x1f\x1f3\x1a\x1e\x1f\x1d\x1e\x1f\x1e\x15 "\x1f\x1e\x1e\x12\x1e\x1a\x13\x1a!\'\x19\x1d\x1a\x1e* \x1f\x1d"\x1d\x1c\x1f\x1f\x1a"\x1f\x1a\x1b\x19&\x17/\x1f\x0e\x0e\n')
self.assertEqual(record.annotations["abif_raw"]['RUNT2'], '01:57:17')
self.assertEqual(record.annotations["abif_raw"]['PBAS2'], 'NNNNNNNNNNCNNNNNNGCTNNNNNGCTCACGTTGATTGCCATATCCTCACAGATTGCCTTCTCACCATTTGTCCCTTCTGATTGATCTATCGGATCGAGTGGTATATTTAACTTTGACATTAGCTGCTTGATATACTTGAAGCATAACATCTTGTTATTGGCAATTGACACAGTTCCACGATCAAGCGTCAAATCCGTCGTTGAATCGAATAGCTTTTTTAAATTCGGATTTTCAAAAACTGTGATAGCATATAGATTTCTGAACAGTGACTTTGCCTCAATACGTCGTAAATTCCGGAACATGTTCAACGAGATAAACGGTGACGATTGACGTACCAACAGGTAGCCGGTGATCGTGTAAATGTTCGCAAATATATCCTTCAACTCGGACGCCATTCCCGAATCCTGTTTTCCGCGAATCTCGATCGTCAGATTTCCGTCAATAATATTGCATAGCCTGATCGCCTGTGCCTTCGGAAACGTATCAATGACGTGATTGATCTCGCACACAATCTCACACTTGCCAACGCATTTTCGGCATTCTCGATGATCATCCGGGTTGATTTGATAACCATCGGGACATTTATCCGAGCAAAGGCCTGCCGTCGCCTTGATAGGCACTGTTTTGTTCGAGAGCACCGGATTCAGCTGCAGACACTGCTCACGGGTCACACAACGACGTTGAAGGAGAAGGTACAGGTGAGCATCACACTTTTCGATACACTTTCCCTTGTGATAGACATTCTTGCACGCGTGGCATGCTGTCGCATCATTCACACGCTCACAACCGCCCACGCATTGATCGTGACATCGATCGCCGTTCGCATCACATCCCGGG')
self.assertEqual(record.annotations["abif_raw"]['RUNT1'], '00:10:18')
self.assertEqual(record.annotations["abif_raw"]['MODL1'], '3730')
self.assertEqual(record.annotations["abif_raw"]['TUBE1'], 'C12')


class TestAbiWrongMode(unittest.TestCase):

Expand Down

0 comments on commit b20f364

Please sign in to comment.