Permalink
Browse files

Use background as an argument to mean, std, distribution

  • Loading branch information...
1 parent fa41c9c commit 012c852179c8f85ec4b7095d6b5ef90770d574cd Michiel de Hoon committed Jan 13, 2013
Showing with 37 additions and 35 deletions.
  1. +28 −28 Bio/Motif/Matrix.py
  2. +1 −1 Bio/Motif/Thresholds.py
  3. +8 −6 Doc/Tutorial.tex
View
@@ -264,9 +264,7 @@ def log_odds(self, background=None):
values = {}
alphabet = self.alphabet
if background is None:
- background = {}
- for letter in alphabet.letters:
- background[letter] = 1.0
+ background = dict.fromkeys(self._letters, 1.0)
else:
background = dict(background)
total = sum(background.values())
@@ -296,7 +294,6 @@ def log_odds(self, background=None):
logodds = float("nan")
values[letter].append(logodds)
pssm = PositionSpecificScoringMatrix(alphabet, values)
- pssm._background = background
return pssm
@@ -386,14 +383,15 @@ def min(self):
score += min([self[letter][position] for letter in letters])
return score
- @property
- def mean(self):
- """Expected value of the score of a motif.
-
- returns None if the expected value is undefined"""
- if self._background is None:
- return None
- background = self._background
+ def mean(self, background=None):
+ """Expected value of the score of a motif."""
+ if background is None:
+ background = dict.fromkeys(self._letters, 1.0)
+ else:
+ background = dict(background)
+ total = sum(background.values())
+ for letter in self._letters:
+ background[letter] /= total
sx = 0.0
for i in range(self.length):
for letter in self._letters:
@@ -402,16 +400,16 @@ def mean(self):
p = b * math.pow(2,logodds)
sx += p * logodds
return sx
-
- @property
- def std(self):
- """Standard deviation of the score of a motif.
-
- returns None if the standard deviation is undefined"""
- if self._background is None:
- return None
- background = self._background
+ def std(self, background=None):
+ """Standard deviation of the score of a motif."""
+ if background is None:
+ background = dict.fromkeys(self._letters, 1.0)
+ else:
+ background = dict(background)
+ total = sum(background.values())
+ for letter in self._letters:
+ background[letter] /= total
variance = 0.0
for i in range(self.length):
sx = 0.0
@@ -472,12 +470,14 @@ def dist_pearson_at(self, other, offset):
denominator = math.sqrt((sxx-sx*sx)*(syy-sy*sy))
return numerator/denominator
- def distribution(self, precision=10**3):
- """calculate the distribution of the scores at the given precision
-
- returns None if the distribution is undefined"""
- background = self._background
- if background==None:
- return None
+ def distribution(self, background=None, precision=10**3):
+ """calculate the distribution of the scores at the given precision."""
from Thresholds import ScoreDistribution
+ if background is None:
+ background = dict.fromkeys(self._letters, 1.0)
+ else:
+ background = dict(background)
+ total = sum(background.values())
+ for letter in self._letters:
+ background[letter] /= total
return ScoreDistribution(precision=precision, pssm=self, background=background)
View
@@ -24,7 +24,7 @@ def __init__(self, motif=None, precision=10**3, pssm=None, background=None):
self.min_score = min(0.0, pssm.min)
self.interval = max(0.0, pssm.max)-self.min_score
self.n_points = precision * pssm.length
- self.ic = pssm.mean
+ self.ic = pssm.mean(background)
self.step = self.interval/(self.n_points-1)
self.mo_density = [0.0]*self.n_points
self.mo_density[-self._index_diff(self.min_score)] = 1.0
View
@@ -11240,7 +11240,8 @@ \section{Position-Specific Scoring Matrices}
For example, against a background with a 40\% GC content, use
%cont-doctest
\begin{verbatim}
->>> pssm = pwm.log_odds(background={'A':0.3,'C':0.2,'G':0.2,'T':0.3})
+>>> background = {'A':0.3,'C':0.2,'G':0.2,'T':0.3}
+>>> pssm = pwm.log_odds(background)
>>> print pssm
0 1 2 3 4
A: 0.42 1.49 -2.17 -0.05 -0.75
@@ -11260,15 +11261,16 @@ \section{Position-Specific Scoring Matrices}
-10.85
\end{verbatim}
-Similarly, the mean and standard deviation of the PSSM scores are stored in
-the \verb+.mean+ and \verb+.std+ attributes:
+The mean and standard deviation of the PSSM scores with respect to a specific
+background are calculated by the \verb+.mean+ and \verb+.std+ methods.
%cont-doctest
\begin{verbatim}
->>> mean = pssm.mean
->>> std = pssm.std
+>>> mean = pssm.mean(background)
+>>> std = pssm.std(background)
>>> print "mean = %0.2f, standard deviation = %0.2f" % (mean, std)
mean = 3.21, standard deviation = 2.59
\end{verbatim}
+A uniform background is used if \verb+background+ is not specified.
The mean is particularly important, as its value is equal to the
Kullback-Leibler divergence or relative entropy, and is a measure for the
information content of the motif compared to the background. As in Biopython
@@ -11374,7 +11376,7 @@ \subsection{Selecting a score threshold}
approximation with a given precision to keep computation cost manageable:
%cont-doctest
\begin{verbatim}
->>> distribution = pssm.distribution(precision=10**4)
+>>> distribution = pssm.distribution(background=background, precision=10**4)
\end{verbatim}
The \verb+distribution+ object can be used to determine a number of different thresholds.
We can specify the requested false-positive rate (probability of ``finding'' a motif instance in background generated sequence):

0 comments on commit 012c852

Please sign in to comment.