Skip to content

Commit

Permalink
Use background as an argument to mean, std, distribution
Browse files Browse the repository at this point in the history
  • Loading branch information
Michiel de Hoon authored and Michiel de Hoon committed Jan 13, 2013
1 parent fa41c9c commit 012c852
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 35 deletions.
56 changes: 28 additions & 28 deletions Bio/Motif/Matrix.py
Expand Up @@ -264,9 +264,7 @@ def log_odds(self, background=None):
values = {}
alphabet = self.alphabet
if background is None:
background = {}
for letter in alphabet.letters:
background[letter] = 1.0
background = dict.fromkeys(self._letters, 1.0)
else:
background = dict(background)
total = sum(background.values())
Expand Down Expand Up @@ -296,7 +294,6 @@ def log_odds(self, background=None):
logodds = float("nan")
values[letter].append(logodds)
pssm = PositionSpecificScoringMatrix(alphabet, values)
pssm._background = background
return pssm


Expand Down Expand Up @@ -386,14 +383,15 @@ def min(self):
score += min([self[letter][position] for letter in letters])
return score

@property
def mean(self):
"""Expected value of the score of a motif.
returns None if the expected value is undefined"""
if self._background is None:
return None
background = self._background
def mean(self, background=None):
"""Expected value of the score of a motif."""
if background is None:
background = dict.fromkeys(self._letters, 1.0)
else:
background = dict(background)
total = sum(background.values())
for letter in self._letters:
background[letter] /= total
sx = 0.0
for i in range(self.length):
for letter in self._letters:
Expand All @@ -402,16 +400,16 @@ def mean(self):
p = b * math.pow(2,logodds)
sx += p * logodds
return sx


@property
def std(self):
"""Standard deviation of the score of a motif.
returns None if the standard deviation is undefined"""
if self._background is None:
return None
background = self._background
def std(self, background=None):
"""Standard deviation of the score of a motif."""
if background is None:
background = dict.fromkeys(self._letters, 1.0)
else:
background = dict(background)
total = sum(background.values())
for letter in self._letters:
background[letter] /= total
variance = 0.0
for i in range(self.length):
sx = 0.0
Expand Down Expand Up @@ -472,12 +470,14 @@ def dist_pearson_at(self, other, offset):
denominator = math.sqrt((sxx-sx*sx)*(syy-sy*sy))
return numerator/denominator

def distribution(self, precision=10**3):
"""calculate the distribution of the scores at the given precision
returns None if the distribution is undefined"""
background = self._background
if background==None:
return None
def distribution(self, background=None, precision=10**3):
"""calculate the distribution of the scores at the given precision."""
from Thresholds import ScoreDistribution
if background is None:
background = dict.fromkeys(self._letters, 1.0)
else:
background = dict(background)
total = sum(background.values())
for letter in self._letters:
background[letter] /= total
return ScoreDistribution(precision=precision, pssm=self, background=background)
2 changes: 1 addition & 1 deletion Bio/Motif/Thresholds.py
Expand Up @@ -24,7 +24,7 @@ def __init__(self, motif=None, precision=10**3, pssm=None, background=None):
self.min_score = min(0.0, pssm.min)
self.interval = max(0.0, pssm.max)-self.min_score
self.n_points = precision * pssm.length
self.ic = pssm.mean
self.ic = pssm.mean(background)
self.step = self.interval/(self.n_points-1)
self.mo_density = [0.0]*self.n_points
self.mo_density[-self._index_diff(self.min_score)] = 1.0
Expand Down
14 changes: 8 additions & 6 deletions Doc/Tutorial.tex
Expand Up @@ -11240,7 +11240,8 @@ \section{Position-Specific Scoring Matrices}
For example, against a background with a 40\% GC content, use
%cont-doctest
\begin{verbatim}
>>> pssm = pwm.log_odds(background={'A':0.3,'C':0.2,'G':0.2,'T':0.3})
>>> background = {'A':0.3,'C':0.2,'G':0.2,'T':0.3}
>>> pssm = pwm.log_odds(background)
>>> print pssm
0 1 2 3 4
A: 0.42 1.49 -2.17 -0.05 -0.75
Expand All @@ -11260,15 +11261,16 @@ \section{Position-Specific Scoring Matrices}
-10.85
\end{verbatim}

Similarly, the mean and standard deviation of the PSSM scores are stored in
the \verb+.mean+ and \verb+.std+ attributes:
The mean and standard deviation of the PSSM scores with respect to a specific
background are calculated by the \verb+.mean+ and \verb+.std+ methods.
%cont-doctest
\begin{verbatim}
>>> mean = pssm.mean
>>> std = pssm.std
>>> mean = pssm.mean(background)
>>> std = pssm.std(background)
>>> print "mean = %0.2f, standard deviation = %0.2f" % (mean, std)
mean = 3.21, standard deviation = 2.59
\end{verbatim}
A uniform background is used if \verb+background+ is not specified.
The mean is particularly important, as its value is equal to the
Kullback-Leibler divergence or relative entropy, and is a measure for the
information content of the motif compared to the background. As in Biopython
Expand Down Expand Up @@ -11374,7 +11376,7 @@ \subsection{Selecting a score threshold}
approximation with a given precision to keep computation cost manageable:
%cont-doctest
\begin{verbatim}
>>> distribution = pssm.distribution(precision=10**4)
>>> distribution = pssm.distribution(background=background, precision=10**4)
\end{verbatim}
The \verb+distribution+ object can be used to determine a number of different thresholds.
We can specify the requested false-positive rate (probability of ``finding'' a motif instance in background generated sequence):
Expand Down

0 comments on commit 012c852

Please sign in to comment.