Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Updating motif export

  • Loading branch information...
commit a34a2ca9feb52fc140a60ce92f93764b9607e20e 1 parent 84e56bd
mdehoon authored
View
14 Bio/Motif/Jaspar.py
@@ -36,3 +36,17 @@ def read(handle, format):
raise ValueError("Unknown format %s" % format)
motif.mask = "*"*motif.length
return motif
+
+def write(motif):
+ """Returns the pfm representation of the motif
+ """
+ letters = "ACGT"
+ counts = motif.counts
+ lines = []
+ for letter in letters:
+ terms = map(str, counts[letter])
+ line = "\t".join(terms) + "\n"
+ lines.append(line)
+ # Finished; glue the lines together
+ text = "".join(lines)
+ return text
View
123 Bio/Motif/TRANSFAC.py
@@ -83,30 +83,8 @@ class Record(list):
def __init__(self):
self.version = None
- @property
- def motifs(self):
- import warnings
- warnings.warn("""\
-The .motifs attribute is now obsolete, and will be deprecated and removed
-in a future release of Biopython. This class now inherits from list, so
-instead of record.motifs[i], please use record[i].
-""", PendingDeprecationWarning)
- return self
-
def __str__(self):
- blocks = []
- if self.version is not None:
- block = """\
-VV %s
-XX
-//
-""" % self.version
- blocks.append(block)
- for motif in self:
- block = str(motif)
- blocks.append(block)
- text = "".join(blocks)
- return text
+ return write(self)
def read(handle):
@@ -164,3 +142,102 @@ def read(handle):
else:
annotations[key] = value
return record
+
+def write(motifs):
+ """Write the representation of a motif in TRANSFAC format
+ """
+ blocks = []
+ try:
+ version = motifs.version
+ except AttributeError:
+ pass
+ else:
+ if version is not None:
+ block = """\
+VV %s
+XX
+//
+""" % version
+ blocks.append(block)
+ multiple_value_keys = Motif.multiple_value_keys
+ sections = (('AC', 'AS',), # Accession
+ ('ID',), # ID
+ ('DT', 'CO'), # Date, copyright
+ ('NA',), # Name
+ ('DE',), # Short factor description
+ ('TY',), # Type
+ ('OS', 'OC'), # Organism
+ ('HP', 'HC'), # Superfamilies, subfamilies
+ ('BF',), # Binding factors
+ ('P0',), # Frequency matrix
+ ('BA',), # Statistical basis
+ ('BS',), # Factor binding sites
+ ('CC',), # Comments
+ ('DR',), # External databases
+ ('OV', 'PV',), # Versions
+ )
+ for motif in motifs:
+ lines = []
+ for section in sections:
+ blank = False
+ for key in section:
+ if key=='P0':
+ # Frequency matrix
+ length = motif.length
+ if length==0:
+ continue
+ sequence = motif.degenerate_consensus
+ line = "P0 A C G T"
+ lines.append(line)
+ for i in range(length):
+ line = "%02.d %6.20g %6.20g %6.20g %6.20g %s" % (
+ i+1,
+ motif.counts['A'][i],
+ motif.counts['C'][i],
+ motif.counts['G'][i],
+ motif.counts['T'][i],
+ sequence[i],
+ )
+ lines.append(line)
+ blank = True
+ else:
+ try:
+ value = motif.get(key)
+ except AttributeError:
+ value = None
+ if value is not None:
+ if key in multiple_value_keys:
+ for v in value:
+ line = "%s %s" % (key, v)
+ lines.append(line)
+ else:
+ line = "%s %s" % (key, value)
+ lines.append(line)
+ blank = True
+ if key=='PV':
+ # References
+ try:
+ references = motif.references
+ except AttributeError:
+ pass
+ else:
+ keys = ("RN", "RX", "RA", "RT", "RL")
+ for reference in references:
+ for key in keys:
+ value = reference.get(key)
+ if value is None:
+ continue
+ line = "%s %s" % (key, value)
+ lines.append(line)
+ blank = True
+ if blank:
+ line = 'XX'
+ lines.append(line)
+ # Finished this motif; glue the lines together
+ line = "//"
+ lines.append(line)
+ block = "\n".join(lines) + "\n"
+ blocks.append(block)
+ # Finished all motifs; glue the blocks together
+ text = "".join(blocks)
+ return text
View
138 Bio/Motif/__init__.py
@@ -521,105 +521,6 @@ def weblogo(self,fname,format="PNG",version="2.8.2", **kwds):
f.write(im)
f.close()
- def _to_transfac(self):
- """Write the representation of a motif in TRANSFAC format
- """
- from Bio.Motif import TRANSFAC
- multiple_value_keys = TRANSFAC.Motif.multiple_value_keys
- sections = (('AC', 'AS',), # Accession
- ('ID',), # ID
- ('DT', 'CO'), # Date, copyright
- ('NA',), # Name
- ('DE',), # Short factor description
- ('TY',), # Type
- ('OS', 'OC'), # Organism
- ('HP', 'HC'), # Superfamilies, subfamilies
- ('BF',), # Binding factors
- ('P0',), # Frequency matrix
- ('BA',), # Statistical basis
- ('BS',), # Factor binding sites
- ('CC',), # Comments
- ('DR',), # External databases
- ('OV', 'PV',), # Versions
- )
- lines = []
- for section in sections:
- blank = False
- for key in section:
- if key=='P0':
- # Frequency matrix
- length = self.length
- if length==0:
- continue
- sequence = self.degenerate_consensus
- line = "P0 A C G T"
- lines.append(line)
- for i in range(length):
- line = "%02.d %6.20g %6.20g %6.20g %6.20g %s" % (
- i+1,
- self.counts['A'][i],
- self.counts['C'][i],
- self.counts['G'][i],
- self.counts['T'][i],
- sequence[i],
- )
- lines.append(line)
- blank = True
- else:
- try:
- value = self.get(key)
- except AttributeError:
- value = None
- if value is not None:
- if key in multiple_value_keys:
- for v in value:
- line = "%s %s" % (key, v)
- lines.append(line)
- else:
- line = "%s %s" % (key, value)
- lines.append(line)
- blank = True
- if key=='PV':
- # References
- try:
- references = self.references
- except AttributeError:
- pass
- else:
- keys = ("RN", "RX", "RA", "RT", "RL")
- for reference in references:
- for key in keys:
- value = reference.get(key)
- if value is None:
- continue
- line = "%s %s" % (key, value)
- lines.append(line)
- blank = True
- if blank:
- line = 'XX'
- lines.append(line)
- # Finished; glue the lines together
- line = "//"
- lines.append(line)
- text = "\n".join(lines) + "\n"
- return text
-
- def _to_jaspar_pfm(self):
- """Returns the pfm representation of the motif
- """
- letters = "ACGT"
- counts = self.counts
- length = self.length
- lines = []
- for letter in letters:
- terms = [str(counts[letter][i]) for i in range(length)]
- line = "\t".join(terms) + "\n"
- lines.append(line)
- # Finished; glue the lines together
- text = "".join(lines)
- return text
-
-
def format(self,format):
"""Returns a string representation of the Motif in a given format
@@ -628,15 +529,38 @@ def format(self,format):
- transfac : TRANSFAC like files
"""
- formatters={
- "pfm": self._to_jaspar_pfm,
- "transfac": self._to_transfac,
- }
+ if format=="pfm":
+ from Bio.Motif import Jaspar
+ return Jaspar.write(self)
+ elif format=="transfac":
+ from Bio.Motif import TRANSFAC
+ motifs = [self]
+ return TRANSFAC.write(motifs)
+ else:
+ raise ValueError("Unknown format type %s" % format)
+
+
+def write(motifs, format):
+ """Returns a string representation of motifs in a given format
+
+ Currently supported fromats:
+ - pfm : JASPAR Position Frequency Matrix
+ [only if len(motifs)==1]
+ - transfac : TRANSFAC like files
+ """
+
+ if format=="pfm":
+ from Bio.Motif import Jaspar
+ if len(motifs)!=1:
+ raise Exception("Only a single motif can be written in the JASPAR Position Frequency Matrix (pfm) format")
+ motif = motifs[0]
+ return Jaspar.write(motif)
+ elif format=="transfac":
+ from Bio.Motif import TRANSFAC
+ return TRANSFAC.write(motifs)
+ else:
+ raise ValueError("Unknown format type %s" % format)
- try:
- return formatters[format]()
- except KeyError:
- raise ValueError("Wrong format type")
NewMotif = Motif
from Bio.Motif._Motif import Motif
View
175 Doc/Tutorial.tex
@@ -80,7 +80,7 @@
\author{Jeff Chang, Brad Chapman, Iddo Friedberg, Thomas Hamelryck, \\
Michiel de Hoon, Peter Cock, Tiago Antao, Eric Talevich, Bartek Wilczy\'{n}ski}
-\date{Last Update -- 3 January 2013 (Biopython 1.60+)}
+\date{Last Update -- 15 January 2013 (Biopython 1.60+)}
%Hack to get the logo at the start of the HTML front page:
%(hopefully this isn't going to be too wide for most people)
@@ -11155,9 +11155,57 @@ \subsubsection*{TRANSFAC}
//
\end{verbatim}
This file shows the frequency matrix of motif \verb+motif1+ of 12 nucleotides.
+In general, one file in the TRANSFAC format can contain multiple motifs. For
+example, this is the contents of the example TRANSFAC file \verb+transfac.dat+:
+\begin{verbatim}
+VV EXAMPLE January 15, 2013
+XX
+//
+ID motif1
+P0 A C G T
+01 1 2 2 0 S
+02 2 1 2 0 R
+03 3 0 1 1 A
+...
+11 0 2 0 3 Y
+12 1 0 3 1 G
+//
+ID motif2
+P0 A C G T
+01 2 1 2 0 R
+02 1 2 2 0 S
+...
+09 0 0 0 5 T
+10 0 2 0 3 Y
+//
+\end{verbatim}
+To parse a TRANSFAC file, use
+%cont-doctest
+\begin{verbatim}
+>>> handle = open("transfac.dat")
+>>> motifs = Motif.parse(handle, "TRANSFAC")
+>>> handle.close()
+\end{verbatim}
+The overall version number, if available, is stored as \verb+motifs.version+:
+%cont-doctest
+\begin{verbatim}
+>>> motifs.version
+'EXAMPLE January 15, 2013'
+\end{verbatim}
+
+Each motif in \verb+motifs+ is in instance of the \verb+Bio.Motif.TRANSFAC.Motif+ class, which inherits both from the \verb+Bio.Motif.Motif+ class and from a Python dictionary. The dictionary uses the two-letter keys to store any additional information about the motif:
+%cont-doctest
+\begin{verbatim}
+>>> motif = motifs[0]
+>>> motif.degenerate_consensus # Using the Bio.Motif.Motif method
+Seq('SRACAGGTGKYG', IUPACAmbiguousDNA())
+>>> motif['ID'] # Using motif as a dictionary
+'motif1'
+\end{verbatim}
-TRANSFAC files are typically much more elaborate than this example, containing lots of additional information about the motif. Two-letter keys are used to store this information:
+TRANSFAC files are typically much more elaborate than this example, containing lots of additional information about the motif. Table \ref{table:transfaccodes} lists the two-letter field codes that are commonly found in TRANSFAC files:
\begin{table}[h]
+\label{table:transfaccodes}
\begin{center}
\caption{Fields commonly found in TRANSFAC files}
\begin{tabular}{|l|l||}
@@ -11185,24 +11233,6 @@ \subsubsection*{TRANSFAC}
\end{center}
\end{table}
-To parse a TRANSFAC file, use
-%cont-doctest
-\begin{verbatim}
->>> handle = open("transfac.dat")
->>> record = Motif.parse(handle, "TRANSFAC")
->>> handle.close()
-\end{verbatim}
-The motifs are stored as the list \verb+record.motifs+. The overall version number, if available, is stored as \verb+record.version+.
-
-Each motif in \verb+record.motifs+ is in instance of the \verb+Bio.Motif.TRANSFAC.Motif+ class, which inherits both from the \verb+Bio.Motif.Motif+ class and from a Python dictionary. The dictionary uses the two-letter keys to store any additional information about the motif:
-%cont-doctest
-\begin{verbatim}
->>> motif = record.motifs[0]
->>> motif.degenerate_consensus # Using the Bio.Motif.Motif method
-Seq('SRACAGGTGKYG', IUPACAmbiguousDNA())
->>> motif['ID'] # Using motif as a dictionary
-'motif1'
-\end{verbatim}
Each motif also has an attribute \verb+.references+ containing the references associated with the motif, using these two-letter keys:
\begin{table}[h]
@@ -11218,9 +11248,70 @@ \subsubsection*{TRANSFAC}
\end{center}
\end{table}
+Printing the motifs writes them out in their native TRANSFAC format:
+%cont-doctest
+\begin{verbatim}
+>>> print motifs
+VV EXAMPLE January 15, 2013
+XX
+//
+ID motif1
+XX
+P0 A C G T
+01 1 2 2 0 S
+02 2 1 2 0 R
+03 3 0 1 1 A
+04 0 5 0 0 C
+05 5 0 0 0 A
+06 0 0 4 1 G
+07 0 1 4 0 G
+08 0 0 0 5 T
+09 0 0 5 0 G
+10 0 1 2 2 K
+11 0 2 0 3 Y
+12 1 0 3 1 G
+XX
+//
+ID motif2
+XX
+P0 A C G T
+01 2 1 2 0 R
+02 1 2 2 0 S
+03 0 5 0 0 C
+04 3 0 1 1 A
+05 0 0 4 1 G
+06 5 0 0 0 A
+07 0 1 4 0 G
+08 0 0 5 0 G
+09 0 0 0 5 T
+10 0 2 0 3 Y
+XX
+//
+<BLANKLINE>
+\end{verbatim}
+You can export the motifs in the TRANSFAC format by capturing this output
+in a string and saving it in a file:
+\begin{verbatim}
+>>> text = str(motifs)
+>>> handle = open("mytransfacfile.dat", 'w')
+>>> handle.write(text)
+>>> handle.close()
+\end{verbatim}
+
\subsection{Writing motifs}
-Speaking of exporting, let's look at export functions. We can export to
-a TRANSFAC-like matrix format (used by some motif processing software)
+Speaking of exporting, let's look at export functions in general.
+To export a motif in the JASPAR \verb+.pfm+ format, use
+%the tabs in the output confuse doctest; don't test
+\begin{verbatim}
+>>> print m.format("pfm")
+3 7 0 2 1
+0 0 5 2 6
+0 0 0 3 0
+4 0 2 0 0
+<BLANKLINE>
+\end{verbatim}
+
+To write the motif in a TRANSFAC-like matrix format, use
%cont-doctest
\begin{verbatim}
>>> print m.format("transfac")
@@ -11235,7 +11326,41 @@ \subsection{Writing motifs}
<BLANKLINE>
\end{verbatim}
-Finally, if we have internet access, we can create a \href{http://weblogo.berkeley.edu}{weblogo}:
+To write out multiple motifs, you can use \verb+Motif.write+.
+This function can be used regardless of whether the motifs originated from a TRANSFAC file. For example,
+%cont-doctest
+\begin{verbatim}
+>>> motifs = [arnt, srf]
+>>> print Motif.write(motifs, 'transfac')
+P0 A C G T
+01 4 16 0 0 C
+02 19 0 1 0 A
+03 0 20 0 0 C
+04 0 0 20 0 G
+05 0 0 0 20 T
+06 0 0 20 0 G
+XX
+//
+P0 A C G T
+01 2 1 39 4 G
+02 9 33 2 2 C
+03 0 45 1 0 C
+04 1 45 0 0 C
+05 32 1 0 13 A
+06 3 1 0 42 T
+07 46 0 0 0 A
+08 1 0 0 45 T
+09 43 0 0 3 A
+10 15 1 0 30 T
+11 2 0 44 0 G
+12 2 1 43 0 G
+XX
+//
+<BLANKLINE>
+\end{verbatim}
+
+\subsection{Creating a sequence logo}
+If we have internet access, we can create a \href{http://weblogo.berkeley.edu}{weblogo}:
\begin{verbatim}
>>> arnt.weblogo("Arnt.png")
\end{verbatim}
@@ -11514,6 +11639,10 @@ \subsection{Selecting a score threshold}
Position -6: score = 4.601
\end{verbatim}
+\section{Each motif object has an associated Position-Specific Scoring Matrix}
+
+To make things easier.
+
\section{Comparing motifs}
\label{sec:comp}
Once we have more than one motif, we might want to compare them. For
View
3  Tests/Motif/transfac.dat
@@ -1,3 +1,6 @@
+VV EXAMPLE January 15, 2013
+XX
+//
ID motif1
P0 A C G T
01 1 2 2 0 S
Please sign in to comment.
Something went wrong with that request. Please try again.