Skip to content

Commit

Permalink
Merge pull request #511 from jamesmartini/develop_entropy508
Browse files Browse the repository at this point in the history
Relative entropy methods and testing for MotifFreqsArray objects
  • Loading branch information
GavinHuttley committed Jan 31, 2020
2 parents a6b82a0 + 3218c71 commit 53dac16
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
51 changes: 50 additions & 1 deletion src/cogent3/core/profile.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import numpy

from numpy import digitize
from numpy import digitize, array
from numpy.random import random
from numpy.testing import assert_allclose

from cogent3.maths.util import safe_log, safe_p_log_p
from cogent3.util.dict_array import DictArray, DictArrayTemplate
Expand Down Expand Up @@ -292,6 +293,54 @@ def entropy(self):
result = self.entropy_terms().row_sum()
return result.array

def relative_entropy_terms(self, background=None):
"""
Computes a row-wise relative entropy terms per motif and stores them in a DictArray.
Parameters
----------
background : dict
{motif_1: prob_1, motif_2: prob_2, ...} is the specified background distribution.
Returns
-------
DictArray
Notes
-----
If background is type None, it defaults to equifrequent.
"""
if background is None:
num_motifs = len(self.motifs)
background = array([1 / num_motifs] * num_motifs)
else:
background = array([background.get(m, 0) for m in self.motifs])

validate_freqs_array(background)
ret = background * (safe_log(background) - safe_log(self.array))
return self.template.wrap(ret)

def relative_entropy(self, background=None):
"""
Computes relative entropy for each row.
Parameters
----------
background : dict
{motif_1: prob_1, motif_2: prob_2, ...} is the specified background distribution.
Returns
-------
array
Notes
-----
If background is type None, it defaults to equifrequent.
"""
result = self.relative_entropy_terms(background=background).row_sum()
return result.array

def information(self):
"""returns information as -max_entropy - entropy"""
n = self.shape[1]
Expand Down
31 changes: 31 additions & 0 deletions tests/test_core/test_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,37 @@ def test_entropy(self):
entropy = got.entropy()
assert_allclose(entropy, [2, 1])

def test_relative_entropy_terms(self):
"""Check that relative_entropy_terms works for different background distributions"""
data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]]
got = MotifFreqsArray(array(data), "ABCD")
rel_entropy = got.relative_entropy_terms(background=None)
expected = [[0, 0, 0, 0], [-0.25, -0.25, -0.5, -0.5]]
assert_allclose(rel_entropy, expected)

background = {"A": 0.5, "B": 0.25, "C": 0.125, "D": 0.125}
rel_entropy = got.relative_entropy_terms(background=background)
expected = [[0.5, 0, -0.125, -0.125], [0, -0.25, -0.375, -0.375]]
assert_allclose(rel_entropy, expected)

with self.assertRaises(ValueError):
got.relative_entropy_terms(background=dict(A=-0.5, B=1.5))

with self.assertRaises(ValueError):
got.relative_entropy_terms(background={"A": 0.5, "B": 0.25, "C": 0.125})

def test_relative_entropy(self):
"""calculates relative entropy correctly"""
data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]]
got = MotifFreqsArray(array(data), "ABCD")
rel_entropy = got.relative_entropy(background=None)
assert_allclose(rel_entropy, [0, -1.5])

background = {"A": 0.5, "B": 0.25, "C": 0.125, "D": 0.125}
rel_entropy = got.relative_entropy(background=background)
expected = [0.25, -1]
assert_allclose(rel_entropy, expected)

def test_information(self):
"""calculates entr0pies correctly"""
data = [[0.25, 0.25, 0.25, 0.25], [0.5, 0.5, 0, 0]]
Expand Down

0 comments on commit 53dac16

Please sign in to comment.