Skip to content

Commit

Permalink
Making the different parsers work uniformly
Browse files Browse the repository at this point in the history
  • Loading branch information
Michiel de Hoon authored and Michiel de Hoon committed Jan 3, 2013
1 parent 3aedeba commit 4e68a7e
Show file tree
Hide file tree
Showing 7 changed files with 433 additions and 409 deletions.
15 changes: 12 additions & 3 deletions Bio/Motif/AlignAce.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,20 @@
from Bio.Seq import Seq


class Record(object):
class Record(list):
def __init__(self):
self.motifs=[]
self.parameters = None

@property
def motifs(self):
import warnings
warnings.warn("""\
The .motifs attribute is now obsolete, and will be deprecated and removed
in a future release of Biopython. This class now inherits from list, so
instead of record.motifs[i], please use record[i].
""", PendingDeprecationWarning)
return self


def read(handle):
"""read(handle)"""
Expand Down Expand Up @@ -50,7 +59,7 @@ def read(handle):
motif.score = float(line.split()[-1])
motif.number = number
motif.mask = mask
record.motifs.append(motif)
record.append(motif)
elif len(line.split("\t"))==4:
seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
instances.append(seq)
Expand Down
8 changes: 4 additions & 4 deletions Bio/Motif/MAST.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,17 @@
# as part of this package.

from Bio.Alphabet import IUPAC
from Bio.Motif.MEME import MEMEMotif
from Bio.Motif import MEME


class Record(object):
"""The class for holding the results from a MAST run.
A MAST.Record holds data about matches between motifs and sequences.
The motifs held by the Record are objects of the class MEMEMotif.
The motifs held by the Record are objects of the class MEME.Motif.
Methods:
get_motif_by_name (motif_name): returns a MEMEMotif with the given
get_motif_by_name (motif_name): returns a MEME.Motif with the given
name.
"""

Expand Down Expand Up @@ -82,7 +82,7 @@ def __read_database_and_motifs(record, handle):
if not line.strip():
break
words = line.strip().split()
motif = MEMEMotif(record.alphabet)
motif = MEME.Motif(record.alphabet)
motif.name = words[0]
motif.length = int(words[1])
# words[2] contains the best possible match
Expand Down
37 changes: 23 additions & 14 deletions Bio/Motif/MEME.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from Bio.Alphabet import IUPAC
from Bio import Seq
from Bio.Motif import Motif
from Bio.Motif import Motif as BaseMotif


def read(handle):
Expand All @@ -17,12 +17,12 @@ def read(handle):
>>> f = open("meme.output.txt")
>>> from Bio.Motif import MEME
>>> record = MEME.read(f)
>>> for motif in record.motifs:
>>> for motif in record:
... for instance in motif.instances:
... print instance.motif_name, instance.sequence_name, instance.strand, instance.pvalue
"""
record = MEMERecord()
record = Record()
__read_version(record, handle)
__read_datafile(record, handle)
__read_alphabet(record, handle)
Expand All @@ -39,12 +39,12 @@ def read(handle):
length, num_occurrences, evalue = __read_motif_statistics(line)
name = __read_motif_name(handle)
instances = __read_motif_sequences(handle, name, alphabet, length, revcomp)
motif = MEMEMotif(alphabet, instances)
motif = Motif(alphabet, instances)
motif.length = length
motif.num_occurrences = num_occurrences
motif.evalue = evalue
motif.name = name
record.motifs.append(motif)
record.append(motif)
__skip_unused_lines(handle)
try:
line = handle.next()
Expand All @@ -57,7 +57,7 @@ def read(handle):
return record


class MEMEMotif(Motif):
class Motif(BaseMotif):
"""A subclass of Motif used in parsing MEME (and MAST) output.
This subclass defines functions and data specific to MEME motifs.
Expand All @@ -68,7 +68,7 @@ class MEMEMotif(Motif):
(DEPRECATION PENDING)
"""
def __init__(self, alphabet=None, instances=None):
Motif.__init__(self, alphabet, instances)
BaseMotif.__init__(self, alphabet, instances)
self.evalue = 0.0

def _numoccurrences(self, number):
Expand All @@ -91,7 +91,7 @@ def add_instance_from_values(self, name = 'default', pvalue = 1, sequence = 'ATA
import warnings
warnings.warn("This function is now obsolete, and will be deprecated and removed "
"in a future release of Biopython.", PendingDeprecationWarning)
inst = MEMEInstance(sequence,self.alphabet)
inst = Instance(sequence,self.alphabet)
inst._pvalue(pvalue)
inst._seqname(name)
inst._start(start)
Expand All @@ -115,7 +115,7 @@ def _evalue(self, evalue):
self.evalue = evalue


class MEMEInstance(Seq.Seq):
class Instance(Seq.Seq):
"""A class describing the instances of a MEME motif, and the data thereof.
"""
def __init__(self,*args,**kwds):
Expand Down Expand Up @@ -173,23 +173,32 @@ def _length(self, length):
self.length = length


class MEMERecord(object):
class Record(list):
"""A class for holding the results of a MEME run.
A MEMERecord is an object that holds the results from running
A Record is an object that holds the results from running
MEME. It implements no methods of its own.
"""
def __init__(self):
"""__init__ (self)"""
self.motifs = []
self.version = ""
self.datafile = ""
self.command = ""
self.alphabet = None
self.sequences = []

@property
def motifs(self):
import warnings
warnings.warn("""\
The .motifs attribute is now obsolete, and will be deprecated and removed
in a future release of Biopython. This class now inherits from list, so
instead of record.motifs[i], please use record[i].
""", PendingDeprecationWarning)
return self

def get_motif_by_name(self, name):
for m in self.motifs:
for m in self:
if m.name == name:
return m

Expand Down Expand Up @@ -335,7 +344,7 @@ def __read_motif_sequences(handle, motif_name, alphabet, length, revcomp):
strand = '+'
sequence = words[4]
assert len(sequence)==length
instance = MEMEInstance(sequence, alphabet)
instance = Instance(sequence, alphabet)
instance.motif_name = motif_name
instance.sequence_name = words[0]
instance.start = int(words[1])
Expand Down
21 changes: 15 additions & 6 deletions Bio/Motif/TRANSFAC.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,27 @@ def __str__(self):
return format(self, "transfac")


class Record(object):
class Record(list):
"""A Bio.Motif.TRANSFAC.Record stores the information in a TRANSFAC
matrix table.
matrix table. The record inherits from a list containing the individual
motifs.
Attributes:
o version: The version number, corresponding to the 'VV' field
in the TRANSFAC file;
o motifs: The list of motifs.
"""
def __init__(self):
self.version = None
self.motifs = []

@property
def motifs(self):
import warnings
warnings.warn("""\
The .motifs attribute is now obsolete, and will be deprecated and removed
in a future release of Biopython. This class now inherits from list, so
instead of record.motifs[i], please use record[i].
""", PendingDeprecationWarning)
return self

def __str__(self):
blocks = []
Expand All @@ -103,7 +112,7 @@ def __str__(self):
//
""" % self.version
blocks.append(block)
for motif in self.motifs:
for motif in self:
block = str(motif)
blocks.append(block)
text = "".join(blocks)
Expand All @@ -119,7 +128,7 @@ def read(handle):
line = line.strip()
if line=='//':
if motif is not None:
record.motifs.append(motif)
record.append(motif)
motif = None
status = None
elif line=='XX':
Expand Down
91 changes: 42 additions & 49 deletions Bio/Motif/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,11 @@
It also includes functionality for parsing AlignACE and MEME programs.
"""
from Bio.Motif._Motif import Motif
from Bio.Motif.AlignAce import read as _AlignAce_read
from Bio.Motif.MEME import read as _MEME_read
from Bio.Motif import Jaspar
from Bio.Motif.Thresholds import ScoreDistribution
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq


def _from_pfm(handle):
return Motif()._from_jaspar_pfm(handle)


def _from_sites(handle):
return Motif()._from_jaspar_sites(handle)


def create(instances, alphabet=None):
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq
for instance in instances:
try:
a = instance.alphabet
Expand All @@ -52,13 +40,16 @@ def parse(handle,format):
"""Parses an output file of motif finding programs.
Currently supported formats:
- AlignAce
- MEME
You can also use single-motif formats, although the Bio.Motif.read()
function is simpler to use in this situation.
- jaspar-pfm
- jaspar-sites
- AlignAce: AlignAce output file format
- MEME: MEME output file motif
- TRANSFAC: TRANSFAC database file format
- pfm: JASPAR-style position-frequency matrix
- sites: JASPAR-style sites file
- jaspar-pfm: JASPAR-style position-frequency matrix [DEPRECATED]
- jaspar-sites: JASPAR-style sites file [DEPRECATED]
As files in the pfm and sites formats contain only a single motif,
it is easier to use Bio.Motif.read() instead of Bio.Motif.parse()
for those.
For example:
Expand All @@ -82,30 +73,38 @@ def parse(handle,format):
GAGGCCGGGGAT
CGACTCGTGCTTAGAAGG
"""
if format in ('pfm', 'sites'):
yield Jaspar.read(handle, format)
elif format=="AlignAce":
record = _AlignAce_read(handle)
for m in record.motifs:
yield m
if format=="AlignAce":
from Bio.Motif import AlignAce
record = AlignAce.read(handle)
return record
elif format=="MEME":
record = _MEME_read(handle)
for m in record.motifs:
yield m
from Bio.Motif import MEME
record = MEME.read(handle)
return record
elif format=="TRANSFAC":
from Bio.Motif import TRANSFAC
record = TRANSFAC.read(handle)
return record
elif format in ('pfm', 'sites'):
from Bio.Motif import Jaspar
motif = Jaspar.read(handle, format)
elif format=="jaspar-pfm":
yield _from_pfm(handle)
motif = Motif()._from_jaspar_pfm(handle)
elif format=="jaspar-sites":
yield _from_sites(handle)
motif = Motif()._from_jaspar_sites(handle)
else:
raise ValueError("Unknown format %s" % format)
# Treat the single-motif formats
motifs = [motif]
return motifs


def read(handle,format):
"""Reads a motif from a handle using a specified file-format.
This supports the same formats as Bio.Motif.parse(), but
only for files containing exactly one record. For example,
reading a pfm file:
only for files containing exactly one motif. For example,
reading a JASPAR-style pfm file:
>>> from Bio import Motif
>>> motif = Motif.read(open("Motif/SRF.pfm"), "pfm")
Expand All @@ -128,32 +127,26 @@ def read(handle,format):
...
ValueError: More than one motif found in handle
If however you want the first record from a file containing
multiple records this function would raise an exception (as
If however you want the first motif from a file containing
multiple motifs this function would raise an exception (as
shown in the example above). Instead use:
>>> from Bio import Motif
>>> motif = Motif.parse(open("Motif/alignace.out"),"AlignAce").next()
>>> motifs = Motif.parse(open("Motif/alignace.out"),"AlignAce")
>>> motif = motifs[0]
>>> motif.consensus
Seq('TCTACGATTGAG', IUPACUnambiguousDNA())
Use the Bio.Motif.parse(handle, format) function if you want
to read multiple records from the handle.
"""
iterator = parse(handle, format)
try:
first = iterator.next()
except StopIteration:
first = None
if first is None:
motifs = parse(handle, format)
if len(motifs)==0:
raise ValueError("No motifs found in handle")
try:
second = iterator.next()
except StopIteration:
second = None
if second is not None:
if len(motifs) > 1:
raise ValueError("More than one motif found in handle")
return first
motif = motifs[0]
return motif


if __name__ == "__main__":
Expand Down
6 changes: 5 additions & 1 deletion Doc/Tutorial.tex
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@

\author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
\date{Last Update -- 29 December 2012 (Biopython 1.60+)}
\date{Last Update -- 3 January 2013 (Biopython 1.60+)}

%Hack to get the logo at the start of the HTML front page:
%(hopefully this isn't going to be too wide for most people)
Expand Down Expand Up @@ -11026,6 +11026,10 @@ \subsubsection*{JASPAR}
GCCCATATATGG
\end{verbatim}

\subsubsection*{MEME}

\subsubsection*{TRANSFAC}

\subsection{Writing motifs}
Speaking of exporting, let's look at export functions. We can export to fasta:
%cont-doctest
Expand Down
Loading

0 comments on commit 4e68a7e

Please sign in to comment.