Permalink
Browse files

Adding a test for the TRANSFAC parser

  • Loading branch information...
1 parent 012c852 commit d84866e4dc5d151fb01b730ff234062202dab6f7 Michiel de Hoon committed Jan 13, 2013
Showing with 142 additions and 62 deletions.
  1. +48 −62 Bio/Motif/TRANSFAC.py
  2. +28 −0 Tests/Motif/transfac.dat
  3. +66 −0 Tests/test_Motif.py
View
@@ -12,7 +12,8 @@
warnings.warn("Bio.Motif.TRANSFAC is experimental code. While it is usable, \
the code is subject to change without warning", BiopythonExperimentalWarning)
-from Bio.Motif import Motif as BaseMotif
+from Bio.Motif import NewMotif as BaseMotif
+from Bio.Alphabet import IUPAC
class Motif(BaseMotif, dict):
@@ -69,17 +70,6 @@ class Motif(BaseMotif, dict):
reference_keys = set(['RX', 'RA', 'RT', 'RL'])
# These keys occur for references
- def __init__(self):
- BaseMotif.__init__(self)
- self.references = []
-
- def __getitem__(self,index):
- # This can be removed if we remove the __getitem__ method from BaseMotif
- return dict.__getitem__(self, index)
-
- def __str__(self):
- return format(self, "transfac")
-
class Record(list):
"""A Bio.Motif.TRANSFAC.Record stores the information in a TRANSFAC
@@ -121,60 +111,56 @@ def __str__(self):
def read(handle):
"""record = read(handle)"""
- motif = None
- status = None
+ annotations = {}
+ references = []
+ counts = None
record = Record()
for line in handle:
line = line.strip()
- if line=='//':
- if motif is not None:
- record.append(motif)
- motif = None
- status = None
- elif line=='XX':
- pass
- else:
- key, value = line[:2], line[4:]
- if key=='VV':
- record.version = value
- continue
- if motif is None:
- motif = Motif()
- if status=="freq":
+ key, value = line[:2], line[4:]
+ if key=='VV':
+ record.version = value
+ elif key=='P0':
+ counts = {}
+ assert value.split()[:4]==['A','C','G','T']
+ length = 0
+ for c in "ACGT":
+ counts[c] = []
+ for line in handle:
+ key, value = line[:2], line[4:]
try:
i = int(key)
except ValueError:
- status = None
- else:
- motif.length+=1
- assert i==motif.length
- values = value.split()
- for c, v in zip("ACGT", values):
- motif.counts[c].append(float(v))
- continue
- if key=='P0':
- assert status!="freq"
- assert motif.counts is None
- motif.counts = {}
- assert value.split()[:4]==['A','C','G','T']
- motif.length = 0
- for c in "ACGT":
- motif.counts[c] = []
- status = "freq"
- elif key=='RN':
- index, accession = value.split(";")
- assert index[0]=='['
- assert index[-1]==']'
- index = int(index[1:-1])
- assert len(motif.references)==index-1
- reference = {key: value}
- motif.references.append(reference)
- elif key in Motif.reference_keys:
- reference[key] = value
- elif key in Motif.multiple_value_keys:
- if not key in motif:
- motif[key] = []
- motif[key].append(value)
- else:
- motif[key] = value
+ break
+ length+=1
+ assert i==length
+ values = value.split()
+ for c, v in zip("ACGT", values):
+ counts[c].append(float(v))
+ if line=='XX':
+ pass
+ elif key=='RN':
+ index, accession = value.split(";")
+ assert index[0]=='['
+ assert index[-1]==']'
+ index = int(index[1:-1])
+ assert len(references)==index-1
+ reference = {key: value}
+ references.append(reference)
+ elif key=='//':
+ if counts is not None:
+ motif = Motif(alphabet=IUPAC.unambiguous_dna, counts=counts)
+ motif.update(annotations)
+ motif.references = references
+ record.append(motif)
+ annotations = {}
+ references = []
+ elif key in Motif.reference_keys:
+ reference[key] = value
+ elif key in Motif.multiple_value_keys:
+ if not key in annotations:
+ annotations[key] = []
+ annotations[key].append(value)
+ else:
+ annotations[key] = value
return record
@@ -0,0 +1,28 @@
+ID motif1
+P0 A C G T
+01 1 2 2 0 S
+02 2 1 2 0 R
+03 3 0 1 1 A
+04 0 5 0 0 C
+05 5 0 0 0 A
+06 0 0 4 1 G
+07 0 1 4 0 G
+08 0 0 0 5 T
+09 0 0 5 0 G
+10 0 1 2 2 K
+11 0 2 0 3 Y
+12 1 0 3 1 G
+//
+ID motif2
+P0 A C G T
+01 2 1 2 0 R
+02 1 2 2 0 S
+03 0 5 0 0 C
+04 3 0 1 1 A
+05 0 0 4 1 G
+06 5 0 0 0 A
+07 0 1 4 0 G
+08 0 0 5 0 G
+09 0 0 0 5 T
+10 0 2 0 3 Y
+//
View
@@ -2420,6 +2420,72 @@ def test_mast_parser_3(self):
handle.close()
+class TestTransfac(unittest.TestCase):
+
+ def test_transfac_parser(self):
+ """Test if Motif can parse TRANSFAC files
+ """
+ handle = open("Motif/transfac.dat")
+ motifs = Motif.parse(handle, 'TRANSFAC')
+ motif = motifs[0]
+ self.assertEqual(motif['ID'], 'motif1')
+ self.assertEqual(len(motif.counts), 4)
+ self.assertEqual(motif.counts.length, 12)
+ self.assertEqual(motif.counts['A', 0], 1)
+ self.assertEqual(motif.counts['A', 1], 2)
+ self.assertEqual(motif.counts['A', 2], 3)
+ self.assertEqual(motif.counts['A', 3], 0)
+ self.assertEqual(motif.counts['A', 4], 5)
+ self.assertEqual(motif.counts['A', 5], 0)
+ self.assertEqual(motif.counts['A', 6], 0)
+ self.assertEqual(motif.counts['A', 7], 0)
+ self.assertEqual(motif.counts['A', 8], 0)
+ self.assertEqual(motif.counts['A', 9], 0)
+ self.assertEqual(motif.counts['A',10], 0)
+ self.assertEqual(motif.counts['A',11], 1)
+ self.assertEqual(motif.counts['C', 0], 2)
+ self.assertEqual(motif.counts['C', 1], 1)
+ self.assertEqual(motif.counts['C', 2], 0)
+ self.assertEqual(motif.counts['C', 3], 5)
+ self.assertEqual(motif.counts['C', 4], 0)
+ self.assertEqual(motif.counts['C', 5], 0)
+ self.assertEqual(motif.counts['C', 6], 1)
+ self.assertEqual(motif.counts['C', 7], 0)
+ self.assertEqual(motif.counts['C', 8], 0)
+ self.assertEqual(motif.counts['C', 9], 1)
+ self.assertEqual(motif.counts['C',10], 2)
+ self.assertEqual(motif.counts['C',11], 0)
+ self.assertEqual(motif.counts['G', 0], 2)
+ self.assertEqual(motif.counts['G', 1], 2)
+ self.assertEqual(motif.counts['G', 2], 1)
+ self.assertEqual(motif.counts['G', 3], 0)
+ self.assertEqual(motif.counts['G', 4], 0)
+ self.assertEqual(motif.counts['G', 5], 4)
+ self.assertEqual(motif.counts['G', 6], 4)
+ self.assertEqual(motif.counts['G', 7], 0)
+ self.assertEqual(motif.counts['G', 8], 5)
+ self.assertEqual(motif.counts['G', 9], 2)
+ self.assertEqual(motif.counts['G',10], 0)
+ self.assertEqual(motif.counts['G',11], 3)
+ self.assertEqual(motif.counts['T', 0], 0)
+ self.assertEqual(motif.counts['T', 1], 0)
+ self.assertEqual(motif.counts['T', 2], 1)
+ self.assertEqual(motif.counts['T', 3], 0)
+ self.assertEqual(motif.counts['T', 4], 0)
+ self.assertEqual(motif.counts['T', 5], 1)
+ self.assertEqual(motif.counts['T', 6], 0)
+ self.assertEqual(motif.counts['T', 7], 5)
+ self.assertEqual(motif.counts['T', 8], 0)
+ self.assertEqual(motif.counts['T', 9], 2)
+ self.assertEqual(motif.counts['T',10], 3)
+ self.assertEqual(motif.counts['T',11], 1)
+ motif = motifs[1]
+ self.assertEqual(motif['ID'], 'motif2')
+ self.assertEqual(len(motif.counts), 4)
+ self.assertEqual(motif.counts.length, 10)
+ handle.close()
+
+
class MotifTestPWM(unittest.TestCase):
def setUp(self):
import warnings

0 comments on commit d84866e

Please sign in to comment.