From 9f692e3d39b7bead111c606ad77d1301ab74f104 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Sun, 22 May 2016 20:54:33 -0700 Subject: [PATCH 1/9] start work --- .../functional_common_information.py | 63 +++++++++++++++++++ .../test_functional_common_information.py | 0 2 files changed, 63 insertions(+) create mode 100644 dit/multivariate/functional_common_information.py create mode 100644 dit/multivariate/tests/test_functional_common_information.py diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py new file mode 100644 index 0000000000..980b600f6f --- /dev/null +++ b/dit/multivariate/functional_common_information.py @@ -0,0 +1,63 @@ +""" +The functional common information. +""" + +from ..distconst import insert_rvf +from ..helpers import flatten, normalize_rvs +from ..math import close +from ..utils import partitions + +from .entropy import entropy +from .binding_information import dual_total_correlation + +def add_partition(dist, part): + invert_part = {e: str(i) for i, es in enumerate(part) for e in es} + dist = insert_rvf(dist, lambda j: invert_part[j]) + return dist + +def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): + rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode) + outcomes = dist.outcomes + f = [len(dist.rvs)] + parts = partitions(outcomes, tuples=True) + dists = [ add_partition(dist, part) for part in parts ] + + B = lambda d: dual_total_correlation(d, rvs, crvs+f, rv_mode) + + dists = [ d for d in dists if close(B(d), 0) ] + return min(dists, key=lambda d: entropy(d, rvs=f, rv_mode=rv_mode)) + +def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): + """ + Compute the functional common information, F, of `dist`. It is the entropy + of the smallest random variable W such that all the variables in `rvs` are + rendered independent conditioned on W, and W is a function of `rvs`. + + Parameters + ---------- + dist : Distribution + The distribution from which the functional common information is + computed. + rvs : list, None + A list of lists. Each inner list specifies the indexes of the random + variables used to calculate the total correlation. If None, then the + total correlation is calculated over all random variables, which is + equivalent to passing `rvs=dist.rvs`. + crvs : list, None + A single list of indexes specifying the random variables to condition + on. If None, then no variables are conditioned on. + rv_mode : str, None + Specifies how to interpret `rvs` and `crvs`. Valid options are: + {'indices', 'names'}. If equal to 'indices', then the elements of + `crvs` and `rvs` are interpreted as random variable indices. If equal + to 'names', the the elements are interpreted as random variable names. + If `None`, then the value of `dist._rv_mode` is consulted, which + defaults to 'indices'. + + Returns + ------- + F : float + The functional common information. + """ + d = functional_markov_chain(dist, rvs, crvs, rv_mode) + return H(d.marginalize(list(flatten(dist.rvs)))) diff --git a/dit/multivariate/tests/test_functional_common_information.py b/dit/multivariate/tests/test_functional_common_information.py new file mode 100644 index 0000000000..e69de29bb2 From bbe5642b0edd3cb7742b02451491c94eb98fdfca Mon Sep 17 00:00:00 2001 From: Ryan James Date: Tue, 24 May 2016 22:09:55 -0700 Subject: [PATCH 2/9] more work --- dit/abc.py | 2 + dit/multivariate/__init__.py | 1 + .../functional_common_information.py | 59 +++++++++++++++++-- .../test_functional_common_information.py | 44 ++++++++++++++ 4 files changed, 102 insertions(+), 4 deletions(-) diff --git a/dit/abc.py b/dit/abc.py index 8a99009049..ec22fbc728 100644 --- a/dit/abc.py +++ b/dit/abc.py @@ -21,6 +21,7 @@ caekl_mutual_information as J, coinformation as I, entropy as H, + functional_common_information as F, gk_common_information as K, interaction_information as II, joint_mss_entropy as M, @@ -50,6 +51,7 @@ 'J', # the CAEKL common information 'K', # the Gacs-Korner common information [meet entropy] 'II', # the interaction information + 'F', # the functional common information 'M', # the joint minimal sufficient statistic entropy ] diff --git a/dit/multivariate/__init__.py b/dit/multivariate/__init__.py index fb8e37bb73..69fe8abb0e 100644 --- a/dit/multivariate/__init__.py +++ b/dit/multivariate/__init__.py @@ -7,6 +7,7 @@ from .caekl_mutual_information import caekl_mutual_information from .coinformation import coinformation from .entropy import entropy +from .functional_common_information import functional_common_information from .gk_common_information import gk_common_information from .interaction_information import interaction_information from .joint_mss import joint_mss_entropy diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py index 980b600f6f..65e6321e38 100644 --- a/dit/multivariate/functional_common_information.py +++ b/dit/multivariate/functional_common_information.py @@ -10,21 +10,72 @@ from .entropy import entropy from .binding_information import dual_total_correlation +__all__ = ['functional_common_information'] + def add_partition(dist, part): + """ + Add a function of the joint distribution. + + Parameters + ---------- + dist : Distribution + The distribution to add a function to. + part : list of lists + A partition of the outcomes. Each outcome will be mapped to the id of + its partition element. + + Returns + ------- + dist : Distribution + The original `dist` with the function defined by `part` added. + """ invert_part = {e: str(i) for i, es in enumerate(part) for e in es} dist = insert_rvf(dist, lambda j: invert_part[j]) return dist def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): + """ + Add the smallest function of `dist` which renders `rvs` independent. + + Parameters + ---------- + dist : Distribution + The distribution for which the smallest function will be constructed. + rvs : list, None + A list of lists. Each inner list specifies the indexes of the random + variables used to calculate the total correlation. If None, then the + total correlation is calculated over all random variables, which is + equivalent to passing `rvs=dist.rvs`. + crvs : list, None + A single list of indexes specifying the random variables to condition + on. If None, then no variables are conditioned on. + rv_mode : str, None + Specifies how to interpret `rvs` and `crvs`. Valid options are: + {'indices', 'names'}. If equal to 'indices', then the elements of + `crvs` and `rvs` are interpreted as random variable indices. If equal + to 'names', the the elements are interpreted as random variable names. + If `None`, then the value of `dist._rv_mode` is consulted, which + defaults to 'indices'. + + Returns + ------- + d : Distribution + The distribution `dist` with the additional variable added to the end. + """ rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode) outcomes = dist.outcomes f = [len(dist.rvs)] - parts = partitions(outcomes, tuples=True) - dists = [ add_partition(dist, part) for part in parts ] + parts = partitions(outcomes) + dists = [ (add_partition(dist, part), part) for part in parts ] B = lambda d: dual_total_correlation(d, rvs, crvs+f, rv_mode) - dists = [ d for d in dists if close(B(d), 0) ] + dists = [ (d, p) for d, p in dists if close(B(d), 0) ] + return dists + dists = [(entropy(d, rvs=f, rv_mode=rv_mode), d) for d in dists ] + return dists + dists = [(h, d) for h, d in dists if h == min(h for h, d in dists)] + return dists return min(dists, key=lambda d: entropy(d, rvs=f, rv_mode=rv_mode)) def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): @@ -60,4 +111,4 @@ def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): The functional common information. """ d = functional_markov_chain(dist, rvs, crvs, rv_mode) - return H(d.marginalize(list(flatten(dist.rvs)))) + return entropy(d.marginalize(list(flatten(dist.rvs)))) diff --git a/dit/multivariate/tests/test_functional_common_information.py b/dit/multivariate/tests/test_functional_common_information.py index e69de29bb2..fae04a5d72 100644 --- a/dit/multivariate/tests/test_functional_common_information.py +++ b/dit/multivariate/tests/test_functional_common_information.py @@ -0,0 +1,44 @@ +""" +Tests for dit.multivariate.functional_common_information. +""" + +from __future__ import division + +from nose.tools import assert_almost_equal, assert_less_equal + +from dit import Distribution, random_distribution +from dit.multivariate import (functional_common_information as F, + dual_total_correlation as B, + joint_mss_entropy as M + ) + +def test_fci1(): + """ + Test known values. + """ + d = Distribution(['000', '011', '101', '110'], [1/4]*4) + assert_almost_equal(F(d), 2.0) + assert_almost_equal(F(d, [[0], [1]]), 0.0) + assert_almost_equal(F(d, [[0], [1]], [2]), 1.0) + +def test_fci2(): + """ + Test known values w/ rv names. + """ + d = Distribution(['000', '011', '101', '110'], [1/4]*4) + d.set_rv_names('XYZ') + assert_almost_equal(F(d), 2.0) + assert_almost_equal(F(d, [[0], [1]]), 0.0) + assert_almost_equal(F(d, [[0], [1]], [2]), 1.0) + +def test_fci3(): + """ + Test that B <= F <= M. + """ + dists = [ random_distribution(2, 2) for _ in range(10) ] + for d in dists: + b = B(d) + f = F(d) + m = M(d) + yield assert_less_equal, b, f + yield assert_less_equal, f, m From fcea626a508bdeec8d6ed08daa41526a9a941618 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Tue, 24 May 2016 22:12:42 -0700 Subject: [PATCH 3/9] remove some testing code --- dit/multivariate/functional_common_information.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py index 65e6321e38..96b25fa10e 100644 --- a/dit/multivariate/functional_common_information.py +++ b/dit/multivariate/functional_common_information.py @@ -66,16 +66,9 @@ def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): outcomes = dist.outcomes f = [len(dist.rvs)] parts = partitions(outcomes) - dists = [ (add_partition(dist, part), part) for part in parts ] - + dists = [ add_partition(dist, part) for part in parts ] B = lambda d: dual_total_correlation(d, rvs, crvs+f, rv_mode) - - dists = [ (d, p) for d, p in dists if close(B(d), 0) ] - return dists - dists = [(entropy(d, rvs=f, rv_mode=rv_mode), d) for d in dists ] - return dists - dists = [(h, d) for h, d in dists if h == min(h for h, d in dists)] - return dists + dists = [ d for d in dists if close(B(d), 0) ] return min(dists, key=lambda d: entropy(d, rvs=f, rv_mode=rv_mode)) def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): From d6ec36775c907fc91bbbc13d5f56a7a8c62e9748 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Thu, 26 May 2016 17:37:05 -0700 Subject: [PATCH 4/9] do some renaming, cleaning things up, starting docs --- dit/abc.py | 4 +- dit/multivariate/__init__.py | 7 +- ...formation.py => dual_total_correlation.py} | 22 +++-- .../functional_common_information.py | 95 ++++++++++++++++++- ...joint_mss.py => mss_common_information.py} | 10 +- ...tion.py => test_dual_total_correlation.py} | 6 +- .../test_functional_common_information.py | 19 +++- ..._mss.py => test_mss_common_information.py} | 2 +- docs/conf.py | 6 ++ .../multivariate/dual_total_correlation.rst | 4 +- .../functional_common_information.rst | 6 ++ .../multivariate/mss_common_information.rst | 6 ++ docs/measures/multivariate/multivariate.rst | 4 +- docs/references.bib | 12 ++- site/src/MathJax/local/dit.js | 9 ++ 15 files changed, 181 insertions(+), 31 deletions(-) rename dit/multivariate/{binding_information.py => dual_total_correlation.py} (82%) rename dit/multivariate/{joint_mss.py => mss_common_information.py} (73%) rename dit/multivariate/tests/{test_binding_information.py => test_dual_total_correlation.py} (93%) rename dit/multivariate/tests/{test_joint_mss.py => test_mss_common_information.py} (95%) create mode 100644 docs/measures/multivariate/functional_common_information.rst create mode 100644 docs/measures/multivariate/mss_common_information.rst diff --git a/dit/abc.py b/dit/abc.py index ec22fbc728..559c3ad81e 100644 --- a/dit/abc.py +++ b/dit/abc.py @@ -24,7 +24,7 @@ functional_common_information as F, gk_common_information as K, interaction_information as II, - joint_mss_entropy as M, + mss_common_information as M, residual_entropy as R, total_correlation as T, tse_complexity as TSE, @@ -41,7 +41,7 @@ 'H', # the joint conditional entropy 'I', # the multivariate conditional mututal information 'T', # the conditional total correlation [multi-information/integration] - 'B', # the conditional binding information [dual total correlation] + 'B', # the conditional dual total correlation [binding information] 'R', # the conditional residual entropy [erasure entropy] 'TSE', # the TSE complexity ] diff --git a/dit/multivariate/__init__.py b/dit/multivariate/__init__.py index 69fe8abb0e..b88c3f6ddf 100644 --- a/dit/multivariate/__init__.py +++ b/dit/multivariate/__init__.py @@ -3,13 +3,16 @@ measures and others are more distantly related. """ -from .binding_information import binding_information, dual_total_correlation, residual_entropy, variation_of_information from .caekl_mutual_information import caekl_mutual_information from .coinformation import coinformation +from .dual_total_correlation import (binding_information, + dual_total_correlation, + independent_information, residual_entropy, + variation_of_information) from .entropy import entropy from .functional_common_information import functional_common_information from .gk_common_information import gk_common_information from .interaction_information import interaction_information -from .joint_mss import joint_mss_entropy +from .mss_common_information import mss_common_information from .total_correlation import total_correlation from .tse_complexity import tse_complexity diff --git a/dit/multivariate/binding_information.py b/dit/multivariate/dual_total_correlation.py similarity index 82% rename from dit/multivariate/binding_information.py rename to dit/multivariate/dual_total_correlation.py index 1db8bcda7b..e1ba33b0d4 100644 --- a/dit/multivariate/binding_information.py +++ b/dit/multivariate/dual_total_correlation.py @@ -1,5 +1,5 @@ """ -The binding information and residual entropy. +The dual total correlation and variation of information. """ from ..shannon import conditional_entropy as H @@ -7,21 +7,23 @@ __all__ = ('binding_information', 'dual_total_correlation', + 'independent_information', 'residual_entropy', + 'variation_of_information', ) -def binding_information(dist, rvs=None, crvs=None, rv_mode=None): +def dual_total_correlation(dist, rvs=None, crvs=None, rv_mode=None): """ - Calculates the binding information, also known as the dual total - correlation. + Calculates the dual total correlation, also known as the binding + information. Parameters ---------- dist : Distribution - The distribution from which the binding information is calculated. + The distribution from which the dual total correlation is calculated. rvs : list, None The indexes of the random variable used to calculate the binding - information. If None, then the binding information is calculated + information. If None, then the dual total correlation is calculated over all random variables. crvs : list, None The indexes of the random variables to condition on. If None, then no @@ -37,7 +39,7 @@ def binding_information(dist, rvs=None, crvs=None, rv_mode=None): Returns ------- B : float - The binding information. + The dual total correlation. Raises ------ @@ -59,6 +61,8 @@ def binding_information(dist, rvs=None, crvs=None, rv_mode=None): def residual_entropy(dist, rvs=None, crvs=None, rv_mode=None): """ + Compute the residual entropy. + Parameters ---------- dist : Distribution @@ -99,6 +103,6 @@ def residual_entropy(dist, rvs=None, crvs=None, rv_mode=None): return R -dual_total_correlation = binding_information +binding_information = dual_total_correlation -variation_of_information = residual_entropy +independent_information = variation_of_information = residual_entropy diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py index 96b25fa10e..05b685d0e8 100644 --- a/dit/multivariate/functional_common_information.py +++ b/dit/multivariate/functional_common_information.py @@ -2,13 +2,16 @@ The functional common information. """ -from ..distconst import insert_rvf +from collections import deque +from itertools import combinations + +from ..distconst import insert_rvf, modify_outcomes from ..helpers import flatten, normalize_rvs from ..math import close from ..utils import partitions from .entropy import entropy -from .binding_information import dual_total_correlation +from .dual_total_correlation import dual_total_correlation __all__ = ['functional_common_information'] @@ -29,11 +32,11 @@ def add_partition(dist, part): dist : Distribution The original `dist` with the function defined by `part` added. """ - invert_part = {e: str(i) for i, es in enumerate(part) for e in es} + invert_part = {e: (i,) for i, es in enumerate(part) for e in es} dist = insert_rvf(dist, lambda j: invert_part[j]) return dist -def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): +def functional_markov_chain_naive(dist, rvs=None, crvs=None, rv_mode=None): """ Add the smallest function of `dist` which renders `rvs` independent. @@ -71,6 +74,88 @@ def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): dists = [ d for d in dists if close(B(d), 0) ] return min(dists, key=lambda d: entropy(d, rvs=f, rv_mode=rv_mode)) +def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): + """ + Add the smallest function of `dist` which renders `rvs` independent. + + Parameters + ---------- + dist : Distribution + The distribution for which the smallest function will be constructed. + rvs : list, None + A list of lists. Each inner list specifies the indexes of the random + variables used to calculate the total correlation. If None, then the + total correlation is calculated over all random variables, which is + equivalent to passing `rvs=dist.rvs`. + crvs : list, None + A single list of indexes specifying the random variables to condition + on. If None, then no variables are conditioned on. + rv_mode : str, None + Specifies how to interpret `rvs` and `crvs`. Valid options are: + {'indices', 'names'}. If equal to 'indices', then the elements of + `crvs` and `rvs` are interpreted as random variable indices. If equal + to 'names', the the elements are interpreted as random variable names. + If `None`, then the value of `dist._rv_mode` is consulted, which + defaults to 'indices'. + + Returns + ------- + d : Distribution + The distribution `dist` with the additional variable added to the end. + + Notes + ----- + The implimentation of this function is quite slow. It is approximately + doubly exponential in the size of the sample space. This method is several + times faster than the naive method however. It remains an open question as + to whether a method to directly construct this variable exists (as it does + with the GK common variable, minimal sufficient statistic, etc). + """ + rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode) + + dist = modify_outcomes(dist, lambda x: tuple(x)) + + part = frozenset([ frozenset([o]) for o in dist.outcomes ]) # make copy + + W = [dist.outcome_length()] + + H = lambda d: entropy(d, W, rv_mode=rv_mode) + B = lambda d: dual_total_correlation(d, rvs, crvs+W, rv_mode) + + optimal_b = dual_total_correlation(dist, rvs, crvs, rv_mode) + + initial = add_partition(dist, part) + optimal = (H(initial), initial) + + queue = deque([part]) + + checked = set() + + while queue: + part = queue.popleft() + + checked.add(part) + + d = add_partition(dist, part) + + if close(B(d), 0): + + h = H(d) + + if h <= optimal[0]: + optimal = (h, d) + + if close(h, optimal_b): + break + + new_parts = [frozenset([ p for p in part if p not in pair ] + + [pair[0]|pair[1]]) + for pair in combinations(part, 2) ] + new_parts = sorted([ part for part in new_parts if part not in checked ], key=lambda p: sorted(map(len, p))) + queue.extendleft(new_parts) + + return optimal[1] + def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): """ Compute the functional common information, F, of `dist`. It is the entropy @@ -104,4 +189,4 @@ def functional_common_information(dist, rvs=None, crvs=None, rv_mode=None): The functional common information. """ d = functional_markov_chain(dist, rvs, crvs, rv_mode) - return entropy(d.marginalize(list(flatten(dist.rvs)))) + return entropy(d, [dist.outcome_length()]) diff --git a/dit/multivariate/joint_mss.py b/dit/multivariate/mss_common_information.py similarity index 73% rename from dit/multivariate/joint_mss.py rename to dit/multivariate/mss_common_information.py index 8ad2028761..e7997e1c10 100644 --- a/dit/multivariate/joint_mss.py +++ b/dit/multivariate/mss_common_information.py @@ -1,13 +1,16 @@ """ -Compute the entropy of the joint minimal sufficient statistic. +Compute the minimal sufficient statistic common information. """ from ..algorithms.minimal_sufficient_statistic import insert_joint_mss +from ..helpers import normalize_rvs from .entropy import entropy -def joint_mss_entropy(dist, rvs=None, crvs=None, rv_mode=None): +def mss_common_information(dist, rvs=None, crvs=None, rv_mode=None): """ - Compute the entropy of the join of the minimal sufficent statistic of each variable about the others. + Compute the minimal sufficient statistic common information, which is the + entropy of the join of the minimal sufficent statistic of each variable + about the others. Parameters ---------- @@ -26,6 +29,7 @@ def joint_mss_entropy(dist, rvs=None, crvs=None, rv_mode=None): defaults to 'indices'. """ + rvs, crvs, rv_mode = normalize_rvs(dist, rvs, crvs, rv_mode) d = insert_joint_mss(dist, -1, rvs, rv_mode) M = entropy(d, [d.outcome_length() - 1], crvs, rv_mode) diff --git a/dit/multivariate/tests/test_binding_information.py b/dit/multivariate/tests/test_dual_total_correlation.py similarity index 93% rename from dit/multivariate/tests/test_binding_information.py rename to dit/multivariate/tests/test_dual_total_correlation.py index 5029d5e195..11f4308589 100644 --- a/dit/multivariate/tests/test_binding_information.py +++ b/dit/multivariate/tests/test_dual_total_correlation.py @@ -1,5 +1,5 @@ """ -Tests for dit.multivariate.binding_information. +Tests for dit.multivariate.dual_total_correlation. """ from __future__ import division @@ -7,8 +7,8 @@ from nose.tools import assert_almost_equal, assert_raises from dit import Distribution as D, ScalarDistribution as SD -from dit.multivariate import (binding_information as B, - residual_entropy as R) +from dit.multivariate import (dual_total_correlation as B, + residual_entropy as R) from dit.shannon import (entropy as H, mutual_information as I) from dit.exceptions import ditException diff --git a/dit/multivariate/tests/test_functional_common_information.py b/dit/multivariate/tests/test_functional_common_information.py index fae04a5d72..7c8ce77cbf 100644 --- a/dit/multivariate/tests/test_functional_common_information.py +++ b/dit/multivariate/tests/test_functional_common_information.py @@ -4,12 +4,13 @@ from __future__ import division +from nose.plugins.attrib import attr from nose.tools import assert_almost_equal, assert_less_equal from dit import Distribution, random_distribution from dit.multivariate import (functional_common_information as F, dual_total_correlation as B, - joint_mss_entropy as M + mss_common_information as M ) def test_fci1(): @@ -31,11 +32,25 @@ def test_fci2(): assert_almost_equal(F(d, [[0], [1]]), 0.0) assert_almost_equal(F(d, [[0], [1]], [2]), 1.0) +@attr('slow') def test_fci3(): """ Test that B <= F <= M. """ - dists = [ random_distribution(2, 2) for _ in range(10) ] + dists = [ random_distribution(2, 3) for _ in range(3) ] + for d in dists: + b = B(d) + f = F(d) + m = M(d) + yield assert_less_equal, b, f + yield assert_less_equal, f, m + +@attr('slow') +def test_fci4(): + """ + Test that B <= F <= M. + """ + dists = [ random_distribution(3, 2) for _ in range(3) ] for d in dists: b = B(d) f = F(d) diff --git a/dit/multivariate/tests/test_joint_mss.py b/dit/multivariate/tests/test_mss_common_information.py similarity index 95% rename from dit/multivariate/tests/test_joint_mss.py rename to dit/multivariate/tests/test_mss_common_information.py index b014dd3d25..0a9e6fdf6d 100644 --- a/dit/multivariate/tests/test_joint_mss.py +++ b/dit/multivariate/tests/test_mss_common_information.py @@ -7,7 +7,7 @@ from nose.tools import assert_almost_equal from dit import Distribution -from dit.multivariate import joint_mss_entropy as M +from dit.multivariate import mss_common_information as M def test_M1(): """ Test M """ diff --git a/docs/conf.py b/docs/conf.py index 30a300dea4..513816852e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -212,15 +212,21 @@ \newcommand{\op}[1]{\ensuremath{\operatorname{#1}}} \renewcommand{\H} {\op{H}} % No more Erd\H{o}s + \newcommand{\I} {\op{I}} \newcommand{\T} {\op{T}} \newcommand{\B} {\op{B}} \newcommand{\J} {\op{J}} + \newcommand{\R} {\op{R}} \newcommand{\II} {\op{II}} \newcommand{\TSE} {\op{TSE}} \newcommand{\K} {\op{K}} +\newcommand{\C} {\op{C}} +\newcommand{\G} {\op{G}} +\newcommand{\F} {\op{F}} +\newcommand{\M} {\op{M}} \renewcommand{\P} {\op{P}} \newcommand{\X} {\op{X}} diff --git a/docs/measures/multivariate/dual_total_correlation.rst b/docs/measures/multivariate/dual_total_correlation.rst index f988fd0389..c0a238d1f8 100644 --- a/docs/measures/multivariate/dual_total_correlation.rst +++ b/docs/measures/multivariate/dual_total_correlation.rst @@ -1,11 +1,11 @@ .. dual_total_correlation.rst -.. py:module:: dit.multivariate.binding_information +.. py:module:: dit.multivariate.dual_total_correlation ********************** Dual Total Correaltion ********************** -The dual total correlation :cite:, or binding information :cite:`Abdallah2012`, is yet another generalization of the mutual information. It is the amount of information that is shared among the variables. It is defined as: +The dual total correlation :cite:`Han1975linear`, or binding information :cite:`Abdallah2012`, is yet another generalization of the mutual information. It is the amount of information that is shared among the variables. It is defined as: .. math:: diff --git a/docs/measures/multivariate/functional_common_information.rst b/docs/measures/multivariate/functional_common_information.rst new file mode 100644 index 0000000000..d20aecbf4e --- /dev/null +++ b/docs/measures/multivariate/functional_common_information.rst @@ -0,0 +1,6 @@ +.. functional_common_information.rst +.. py:module:: dit.multivariate.functional_common_information + +***************************** +Functional Common Information +***************************** diff --git a/docs/measures/multivariate/mss_common_information.rst b/docs/measures/multivariate/mss_common_information.rst new file mode 100644 index 0000000000..108ba8e006 --- /dev/null +++ b/docs/measures/multivariate/mss_common_information.rst @@ -0,0 +1,6 @@ +.. mss_common_information.rst +.. py:module:: dit.multivariate.mss_common_information + +********************** +MSS Common Information +********************** diff --git a/docs/measures/multivariate/multivariate.rst b/docs/measures/multivariate/multivariate.rst index 05cd584ec0..cb4d194eb2 100644 --- a/docs/measures/multivariate/multivariate.rst +++ b/docs/measures/multivariate/multivariate.rst @@ -37,7 +37,8 @@ These measures all somehow measure shared information, but do not equal the mutu :maxdepth: 1 gk_common_information - tse_complexity + functional_common_information + mss_common_information Others ====== @@ -47,3 +48,4 @@ These measures quantify other aspects of a joint distribution. :maxdepth: 1 residual_entropy + tse_complexity diff --git a/docs/references.bib b/docs/references.bib index 60d2d05af5..3aaf13892b 100644 --- a/docs/references.bib +++ b/docs/references.bib @@ -68,7 +68,7 @@ @article{Gacs1973 year={1973} } @article{Han1980, - author = {Han, Te Sun}, + author = {Han, T. S.}, doi = {10.1016/S0019-9958(80)90478-7}, issn = {00199958}, journal = {Information and Control}, @@ -79,6 +79,16 @@ @article{Han1980 volume = {46}, year = {1980} } +@article{Han1975linear, + title={Linear dependence structure of the entropy space}, + author={Han, T. S.}, + journal={Information and Control}, + volume={29}, + pages={337--368}, + year={1975}, + publisher={Elsevier} +} + @article{Verdu2008, title={The information lost in erasures}, author={Verdu, Sergio and Weissman, Tsachy}, diff --git a/site/src/MathJax/local/dit.js b/site/src/MathJax/local/dit.js index 9277a2d880..099f085caf 100644 --- a/site/src/MathJax/local/dit.js +++ b/site/src/MathJax/local/dit.js @@ -32,16 +32,25 @@ MathJax.Hub.Register.StartupHook("TeX Jax Ready",function () { var TEX = MathJax.InputJax.TeX; TEX.Macro("op", "\\operatorname{#1}", 1); + TEX.Macro("H", "\\op{H}"); + + // mutual informations TEX.Macro("I", "\\op{I}"); TEX.Macro("T", "\\op{T}"); TEX.Macro("B", "\\op{B}"); TEX.Macro("J", "\\op{J}"); + TEX.Macro("R", "\\op{R}"); TEX.Macro("II", "\\op{II}"); TEX.Macro("TSE", "\\op{TSE}"); + // common information TEX.Macro("K", "\\op{K}"); + TEX.Macro("C", "\\op{C}"); + TEX.Macro("G", "\\op{G}"); + TEX.Macro("F", "\\op{F}"); + TEX.Macro("M", "\\op{M}"); TEX.Macro("P", "\\op{P}"); TEX.Macro("X", "\\op{X}"); From f5b56af320bb5ae475d869bc690934154ff2d327 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Thu, 26 May 2016 22:33:28 -0700 Subject: [PATCH 5/9] some docs --- docs/measures/multivariate/functional_common_information.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/measures/multivariate/functional_common_information.rst b/docs/measures/multivariate/functional_common_information.rst index d20aecbf4e..ba94a2b9b0 100644 --- a/docs/measures/multivariate/functional_common_information.rst +++ b/docs/measures/multivariate/functional_common_information.rst @@ -4,3 +4,8 @@ ***************************** Functional Common Information ***************************** + +The functional common information captures the minimum amount of information neccessary to capture all of a distribution's share information using a function of that information. In other words: + +.. math:: + \F[X_{0:n}] = \min_{\substack{W = f(X_{0:n} \\ \B[X_{0:n}|W] = 0} \H[W] From 8a11c7a6b3227907a1484a8704aacedf0b0c9c29 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Fri, 27 May 2016 12:57:18 -0700 Subject: [PATCH 6/9] add documentation --- docs/conf.py | 5 ++ .../functional_common_information.rst | 17 ++++++- .../multivariate/mss_common_information.rst | 11 +++++ docs/measures/multivariate/multivariate.rst | 9 ++++ docs/notation.rst | 48 +++++-------------- site/src/MathJax/local/dit.js | 6 +++ 6 files changed, 58 insertions(+), 38 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 513816852e..ac13198d58 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -245,6 +245,11 @@ \newcommand{\imore}{\succeq} \newcommand{\ieq} {\cong} \newcommand{\mss} {\searrow} + +\DeclareMathOperator*{\meetop}{\scalerel*{\meet}{\textstyle\sum}} +\DeclareMathOperator*{\joinop}{\scalerel*{\join}{\textstyle\sum}} + +\newcommand{\ind}{\mathrel{\text{\scalebox{1.07}{$\perp\mkern-10mu\perp$}}}} ''' latex_elements = { diff --git a/docs/measures/multivariate/functional_common_information.rst b/docs/measures/multivariate/functional_common_information.rst index ba94a2b9b0..6e58871759 100644 --- a/docs/measures/multivariate/functional_common_information.rst +++ b/docs/measures/multivariate/functional_common_information.rst @@ -8,4 +8,19 @@ Functional Common Information The functional common information captures the minimum amount of information neccessary to capture all of a distribution's share information using a function of that information. In other words: .. math:: - \F[X_{0:n}] = \min_{\substack{W = f(X_{0:n} \\ \B[X_{0:n}|W] = 0} \H[W] + + \F[X_{0:n}] = \min_{\substack{\ind X_{0:n} \mid W \\ W = f(X_{0:n})}} \H[W] + +Relationship To Other Measures of Common Information +==================================================== + +Since this is an additional constraint on the Exact common information, it is generally larger than it, and since its constraint is weaker than that of the :doc:`mss_common_information`, it is generally less than it: + +.. math:: + + \G[X_{0:n}] \leq \F[X_{0:n}] \leq \M[X_{0:n}] + +API +=== + +.. autofunction:: functional_common_information diff --git a/docs/measures/multivariate/mss_common_information.rst b/docs/measures/multivariate/mss_common_information.rst index 108ba8e006..51138c370a 100644 --- a/docs/measures/multivariate/mss_common_information.rst +++ b/docs/measures/multivariate/mss_common_information.rst @@ -4,3 +4,14 @@ ********************** MSS Common Information ********************** + +The Minimal Sufficient Statistic Common Information is the entropy of the join of the minimal sufficient statistic of each variable about the others: + +.. math:: + + \M[X_{0:n}] = \H\left[ \joinop_i X_i \mss X_\overline{\{i\}} \right] + +API +=== + +.. autofunction:: mss_common_information diff --git a/docs/measures/multivariate/multivariate.rst b/docs/measures/multivariate/multivariate.rst index cb4d194eb2..9a7d59866a 100644 --- a/docs/measures/multivariate/multivariate.rst +++ b/docs/measures/multivariate/multivariate.rst @@ -40,6 +40,15 @@ These measures all somehow measure shared information, but do not equal the mutu functional_common_information mss_common_information +Ordering +-------- + +The common information measures (togehter with the :doc:`dual_total_correlation`) form an ordering: + +.. math:: + + \K[X_{0:n}] \B[X_{0:n}] \leq \F[X_{0:n}] \leq \M[X_{0:n}] + Others ====== These measures quantify other aspects of a joint distribution. diff --git a/docs/notation.rst b/docs/notation.rst index 3c5ab34fb5..e574927cef 100644 --- a/docs/notation.rst +++ b/docs/notation.rst @@ -4,50 +4,24 @@ Notation ******** -``dit`` is a scientific tool, and so, much of this documentation will contain -mathematical expressions. Here we will describe this notation. +``dit`` is a scientific tool, and so, much of this documentation will contain mathematical expressions. Here we will describe this notation. Basic Notation ============== -A random variable :math:`X` consists of *outcomes* :math:`x` from an *alphabet* -:math:`\mathcal{X}`. As such, we write the entropy of a distribution as -:math:`\H[X] = \sum_{x \in \mathcal{X}} p(x) \log_2 p(x)`, where :math:`p(x)` -denote the probability of the outcome :math:`x` occuring. - -Many distributions are *joint* distribution. In the absence of variable names, -we index each random variable with a subscript. For example, a distribution over -three variables is written :math:`X_0X_1X_2`. As a shorthand, we also denote -those random variables as :math:`X_{0:3}`, meaning start with :math:`X_0` and go -through, but not including :math:`X_3` — just like python slice notation. - -If we ever need to describe an infinitely long chain of variables we drop the -index from the side that is infinite. So :math:`X_{:0} = \ldots -X_{-3}X_{-2}X_{-1}` and :math:`X_{0:} = X_0X_1X_2\ldots`. For an arbitrary set -of indices :math:`A`, the corresponding collection of random variables is -denoted :math:`X_A`. For example, if :math:`A = \{0,2,4\}`, then :math:`X_A = -X_0 X_2 X_4`. The complement of :math:`A` (with respect to some universal set) -is denoted :math:`\bar{A}`. +A random variable :math:`X` consists of *outcomes* :math:`x` from an *alphabet* :math:`\mathcal{X}`. As such, we write the entropy of a distribution as :math:`\H[X] = \sum_{x \in \mathcal{X}} p(x) \log_2 p(x)`, where :math:`p(x)` denote the probability of the outcome :math:`x` occuring. + +Many distributions are *joint* distribution. In the absence of variable names, we index each random variable with a subscript. For example, a distribution over three variables is written :math:`X_0X_1X_2`. As a shorthand, we also denote those random variables as :math:`X_{0:3}`, meaning start with :math:`X_0` and go through, but not including :math:`X_3` — just like python slice notation. + +If a set of variables :math:`X_{0:n}` are independent, we will write :math:`\ind X_{0:n}`. If a set of variables :math:`X_{0:n}` are independent conditioned on :math:`V`, we write :math:`\ind X_{0:n} \mid V`. + +If we ever need to describe an infinitely long chain of variables we drop the index from the side that is infinite. So :math:`X_{:0} = \ldots X_{-3}X_{-2}X_{-1}` and :math:`X_{0:} = X_0X_1X_2\ldots`. For an arbitrary set of indices :math:`A`, the corresponding collection of random variables is denoted :math:`X_A`. For example, if :math:`A = \{0,2,4\}`, then :math:`X_A = X_0 X_2 X_4`. The complement of :math:`A` (with respect to some universal set) is denoted :math:`\overline{A}`. Furthermore, we define :math:`0 \log_2 0 = 0`. Advanced Notation ================= -When there exists a function :math:`Y = f(X)` we write :math:`X \imore Y` -meaning that :math:`X` is *informationally richer* than :math:`Y`. Similarly, if -:math:`f(Y) = X` then we write :math:`X \iless Y` and say that :math:`X` is -*informationally poorer* than :math:`Y`. If :math:`X \iless Y` and :math:`X -\imore Y` then we write :math:`X \ieq Y` and say that :math:`X` is -*informationally equivalent* to :math:`Y`. Of all the variables that are poorer -than both :math:`X` and :math:`Y`, there is a richest one. This variable is -known as the *meet* of :math:`X` and :math:`Y` and is denoted :math:`X \meet Y`. -By definition, :math:`\forall Z s.t. Z \iless X` and :math:`Z \iless Y, Z \iless -X \meet Y`. Similarly of all variables richer than both :math:`X` and :math:`Y`, -there is a poorest. This variable is known as the *join* of :math:`X` and -:math:`Y` and is denoted :math:`X \join Y`. The joint random variable -:math:`(X,Y)` and the join are informationally equivalent: :math:`(X,Y) \ieq X -\join Y`. - -Lastly, we use :math:`X \mss Y` to denote the minimal sufficient statistic of -:math:`X` about the random variable :math:`Y`. +When there exists a function :math:`Y = f(X)` we write :math:`X \imore Y` meaning that :math:`X` is *informationally richer* than :math:`Y`. Similarly, if :math:`f(Y) = X` then we write :math:`X \iless Y` and say that :math:`X` is *informationally poorer* than :math:`Y`. If :math:`X \iless Y` and :math:`X \imore Y` then we write :math:`X \ieq Y` and say that :math:`X` is *informationally equivalent* to :math:`Y`. Of all the variables that are poorer than both :math:`X` and :math:`Y`, there is a richest one. This variable is known as the *meet* of :math:`X` and :math:`Y` and is denoted :math:`X \meet Y`. By definition, :math:`\forall Z s.t. Z \iless X` and :math:`Z \iless Y, Z \iless X \meet Y`. Similarly of all variables richer than both :math:`X` and :math:`Y`, there is a poorest. This variable is known as the *join* of :math:`X` and :math:`Y` and is denoted :math:`X \join Y`. The joint random variable :math:`(X,Y)` and the join are informationally equivalent: :math:`(X,Y) \ieq X \join Y`. + +Lastly, we use :math:`X \mss Y` to denote the minimal sufficient statistic of :math:`X` about the random variable :math:`Y`. diff --git a/site/src/MathJax/local/dit.js b/site/src/MathJax/local/dit.js index 099f085caf..a090f42c98 100644 --- a/site/src/MathJax/local/dit.js +++ b/site/src/MathJax/local/dit.js @@ -71,6 +71,12 @@ MathJax.Hub.Register.StartupHook("TeX Jax Ready",function () { TEX.Macro("ieq", "\\cong"); TEX.Macro("mss", "\\searrow"); + TEX.Macro("meetop", "\\DeclareMathOperator*{\\meetop}{\\scalerel*{\\meet}{\\textstyle\\sum}}") + TEX.Macro("joinop", "\\DeclareMathOperator*{\\joinop}{\]scalerel*{\]join}{\]textstyle\]sum}}") + + TEX.Macro("ind", "\\mathrel{\\text{\\scalebox{1.07}{$\\perp\\mkern-10mu\\perp$}}}") + + // don't use stix, it's pretty ugly MathJax.Hub.Config({ "HTML-CSS": { availableFonts: ["TeX"] } From f0978b726ca477152cb1ece0199f3d703e48f8dc Mon Sep 17 00:00:00 2001 From: Ryan James Date: Fri, 27 May 2016 13:13:58 -0700 Subject: [PATCH 7/9] missed a \leq --- docs/measures/multivariate/multivariate.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/measures/multivariate/multivariate.rst b/docs/measures/multivariate/multivariate.rst index 9a7d59866a..4df01de4fd 100644 --- a/docs/measures/multivariate/multivariate.rst +++ b/docs/measures/multivariate/multivariate.rst @@ -47,7 +47,7 @@ The common information measures (togehter with the :doc:`dual_total_correlation` .. math:: - \K[X_{0:n}] \B[X_{0:n}] \leq \F[X_{0:n}] \leq \M[X_{0:n}] + \K[X_{0:n}] \leq \B[X_{0:n}] \leq \F[X_{0:n}] \leq \M[X_{0:n}] Others ====== From 966e1cce6ed26a86eb13a74bbbe788b53cc9c0a0 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Fri, 27 May 2016 13:42:50 -0700 Subject: [PATCH 8/9] don't cover the naive version --- dit/multivariate/functional_common_information.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py index 05b685d0e8..68cbe3e024 100644 --- a/dit/multivariate/functional_common_information.py +++ b/dit/multivariate/functional_common_information.py @@ -36,7 +36,7 @@ def add_partition(dist, part): dist = insert_rvf(dist, lambda j: invert_part[j]) return dist -def functional_markov_chain_naive(dist, rvs=None, crvs=None, rv_mode=None): +def functional_markov_chain_naive(dist, rvs=None, crvs=None, rv_mode=None): # pragma: no cover """ Add the smallest function of `dist` which renders `rvs` independent. From 0c4a149e045421e8dc4fabb24f3ece996e01a605 Mon Sep 17 00:00:00 2001 From: Ryan James Date: Fri, 27 May 2016 14:04:21 -0700 Subject: [PATCH 9/9] more tests, better coverage --- .../functional_common_information.py | 2 +- .../test_functional_common_information.py | 26 +++++++++++++++-- dit/profiles/entropy_triangle.py | 2 +- dit/profiles/information_partitions.py | 2 +- dit/profiles/tests/test_entropy_triangle.py | 28 ++++++++++++++++++- 5 files changed, 54 insertions(+), 6 deletions(-) diff --git a/dit/multivariate/functional_common_information.py b/dit/multivariate/functional_common_information.py index 68cbe3e024..d2ac0360ad 100644 --- a/dit/multivariate/functional_common_information.py +++ b/dit/multivariate/functional_common_information.py @@ -131,7 +131,7 @@ def functional_markov_chain(dist, rvs=None, crvs=None, rv_mode=None): checked = set() - while queue: + while queue: # pragma: no branch part = queue.popleft() checked.add(part) diff --git a/dit/multivariate/tests/test_functional_common_information.py b/dit/multivariate/tests/test_functional_common_information.py index 7c8ce77cbf..f5ece67a53 100644 --- a/dit/multivariate/tests/test_functional_common_information.py +++ b/dit/multivariate/tests/test_functional_common_information.py @@ -32,8 +32,30 @@ def test_fci2(): assert_almost_equal(F(d, [[0], [1]]), 0.0) assert_almost_equal(F(d, [[0], [1]], [2]), 1.0) -@attr('slow') def test_fci3(): + """ Test against known values """ + outcomes = ['000', + 'a00', + '00c', + 'a0c', + '011', + 'a11', + '101', + 'b01', + '01d', + 'a1d', + '10d', + 'b0d', + '110', + 'b10', + '11c', + 'b1c',] + pmf = [1/16]*16 + d = Distribution(outcomes, pmf) + assert_almost_equal(F(d), 2.0) + +@attr('slow') +def test_fci4(): """ Test that B <= F <= M. """ @@ -46,7 +68,7 @@ def test_fci3(): yield assert_less_equal, f, m @attr('slow') -def test_fci4(): +def test_fci5(): """ Test that B <= F <= M. """ diff --git a/dit/profiles/entropy_triangle.py b/dit/profiles/entropy_triangle.py index 1d618bba0a..edcbc291eb 100644 --- a/dit/profiles/entropy_triangle.py +++ b/dit/profiles/entropy_triangle.py @@ -61,7 +61,7 @@ def __init__(self, dists): @staticmethod @abstractmethod - def _compute_point(dist): + def _compute_point(dist): # pragma: no cover """ Compute the three normalized axis. diff --git a/dit/profiles/information_partitions.py b/dit/profiles/information_partitions.py index 9f22f257be..25059da187 100644 --- a/dit/profiles/information_partitions.py +++ b/dit/profiles/information_partitions.py @@ -62,7 +62,7 @@ def __init__(self, dist): @staticmethod @abstractmethod - def _symbol(rvs, crvs): + def _symbol(rvs, crvs): # pragma: no cover """ This method should return the information symbol for an atom. """ diff --git a/dit/profiles/tests/test_entropy_triangle.py b/dit/profiles/tests/test_entropy_triangle.py index fa2dd7fca5..e659f8c6b7 100644 --- a/dit/profiles/tests/test_entropy_triangle.py +++ b/dit/profiles/tests/test_entropy_triangle.py @@ -4,7 +4,7 @@ from __future__ import division -from nose.tools import assert_tuple_equal +from nose.tools import assert_in, assert_tuple_equal from dit import Distribution from dit.profiles import EntropyTriangle, EntropyTriangle2 @@ -27,6 +27,19 @@ def test_et_1(): for d, val in zip(examples, vals): yield assert_tuple_equal, EntropyTriangle(d).points[0], val +def test_et_2(): + """ + Test EntropyTriangle against known values. + """ + vals = [(0, 0, 1), + (0, 1, 0), + (0, 2/3, 1/3), + (0, 1, 0), + ] + et = EntropyTriangle(examples) + for val in vals: + yield assert_in, val, et.points + def test_et2_1(): """ @@ -39,3 +52,16 @@ def test_et2_1(): ] for d, val in zip(examples, vals): yield assert_tuple_equal, EntropyTriangle2(d).points[0], val + +def test_et_2(): + """ + Test EntropyTriangle against known values. + """ + vals = [(1, 0, 0), + (0, 2/3, 1/3), + (1/3, 1/3, 1/3), + (0, 1/3, 2/3), + ] + et = EntropyTriangle2(examples) + for val in vals: + yield assert_in, val, et.points